def test_entity_dimensions(self): entity_name = "e" relation_config = RelationSchema(name="r", lhs=entity_name, rhs=entity_name) base_config = ConfigSchema( dimension=10, relations=[relation_config], entities={ entity_name: EntitySchema(num_partitions=1, dimension=8) }, entity_path=None, # filled in later edge_paths=[], # filled in later checkpoint_path=self.checkpoint_path.name, workers=2, ) dataset = generate_dataset(base_config, num_entities=100, fractions=[0.4, 0.2]) self.addCleanup(dataset.cleanup) train_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[0].name], ) eval_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[1].name], relations=[attr.evolve(relation_config, all_negs=True)], ) # Just make sure no exceptions are raised and nothing crashes. train(train_config, rank=0, subprocess_init=self.subprocess_init) self.assertCheckpointWritten(train_config, version=1) do_eval(eval_config, subprocess_init=self.subprocess_init)
def test_featurized(self): e1 = EntitySchema(num_partitions=1, featurized=True) e2 = EntitySchema(num_partitions=1) r1 = RelationSchema(name="r1", lhs="e1", rhs="e2") r2 = RelationSchema(name="r2", lhs="e2", rhs="e1") base_config = ConfigSchema( dimension=10, relations=[r1, r2], entities={ "e1": e1, "e2": e2 }, entity_path=None, # filled in later edge_paths=[], # filled in later checkpoint_path=self.checkpoint_path.name, workers=2, ) dataset = generate_dataset(base_config, num_entities=100, fractions=[0.4, 0.2]) self.addCleanup(dataset.cleanup) train_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[0].name], ) eval_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[1].name], ) # Just make sure no exceptions are raised and nothing crashes. train(train_config, rank=0, subprocess_init=self.subprocess_init) self.assertCheckpointWritten(train_config, version=1) do_eval(eval_config, subprocess_init=self.subprocess_init)
def _test_gpu(self, do_half_precision=False, num_partitions=2): entity_name = "e" relation_config = RelationSchema(name="r", lhs=entity_name, rhs=entity_name) base_config = ConfigSchema( dimension=16, batch_size=1024, num_batch_negs=64, num_uniform_negs=64, relations=[relation_config], entities={entity_name: EntitySchema(num_partitions=num_partitions)}, entity_path=None, # filled in later edge_paths=[], # filled in later checkpoint_path=self.checkpoint_path.name, workers=2, num_gpus=2, regularization_coef=1e-4, half_precision=do_half_precision, ) dataset = generate_dataset(base_config, num_entities=100, fractions=[0.4, 0.2]) self.addCleanup(dataset.cleanup) train_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[0].name], ) eval_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[1].name], relations=[attr.evolve(relation_config, all_negs=True)], ) # Just make sure no exceptions are raised and nothing crashes. train(train_config, rank=0, subprocess_init=self.subprocess_init) self.assertCheckpointWritten(train_config, version=1) do_eval(eval_config, subprocess_init=self.subprocess_init)
def main(): setup_logging() parser = argparse.ArgumentParser(description='Example on FB15k') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', type=Path, default='data', help='where to save processed data') parser.add_argument('--no-filtered', dest='filtered', action='store_false', help='Run unfiltered eval') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir fpath = download_url(FB15K_URL, data_dir) extract_tar(fpath) print('Downloaded and extracted file.') loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) set_logging_verbosity(config.verbose) subprocess_init = SubprocessInitializer() subprocess_init.register(setup_logging, config.verbose) subprocess_init.register(add_to_sys_path, loader.config_dir.name) input_edge_paths = [data_dir / name for name in FILENAMES] output_train_path, output_valid_path, output_test_path = config.edge_paths convert_input_data( config.entities, config.relations, config.entity_path, config.edge_paths, input_edge_paths, lhs_col=0, rhs_col=2, rel_col=1, dynamic_relations=config.dynamic_relations, ) train_config = attr.evolve(config, edge_paths=[output_train_path]) train(train_config, subprocess_init=subprocess_init) relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve( config, edge_paths=[output_test_path], relations=relations, num_uniform_negs=0) if args.filtered: filter_paths = [output_test_path, output_valid_path, output_train_path] do_eval( eval_config, evaluator=FilteredRankingEvaluator(eval_config, filter_paths), subprocess_init=subprocess_init, ) else: do_eval(eval_config, subprocess_init=subprocess_init)
def main(): parser = argparse.ArgumentParser(description='Example on FB15k') parser.add_argument('--config', default='./fb15k_config.py', help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', default='../../../data', help='where to save processed data') parser.add_argument('--no-filtered', dest='filtered', action='store_false', help='Run unfiltered eval') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir #fpath = utils.download_url(FB15K_URL, data_dir) #utils.extract_tar(fpath) #print('Downloaded and extracted file.') edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()] print('edge_paths', edge_paths) convert_input_data( args.config, edge_paths, lhs_col=0, rhs_col=2, rel_col=1, ) config = parse_config(args.config, overrides) train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train(train_config) eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))] relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve(config, edge_paths=eval_path, relations=relations) if args.filtered: filter_paths = [ convert_path(os.path.join(data_dir, FILENAMES['test'])), convert_path(os.path.join(data_dir, FILENAMES['valid'])), convert_path(os.path.join(data_dir, FILENAMES['train'])), ] do_eval(eval_config, FilteredRankingEvaluator(eval_config, filter_paths)) else: do_eval(eval_config)
def main(): parser = argparse.ArgumentParser(description='Example on Livejournal') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', default='data', help='where to save processed data') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir os.makedirs(data_dir, exist_ok=True) fpath = utils.download_url(URL, data_dir) fpath = utils.extract_gzip(fpath) print('Downloaded and extracted file.') # random split file for train and test random_split_file(fpath) loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()] convert_input_data( config.entities, config.relations, config.entity_path, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, dynamic_relations=config.dynamic_relations, ) train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train( train_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), ) eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))] eval_config = attr.evolve(config, edge_paths=eval_path) do_eval( eval_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), )
def main(): setup_logging() parser = argparse.ArgumentParser(description='Example on Livejournal') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', type=Path, default='data', help='where to save processed data') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir data_dir.mkdir(parents=True, exist_ok=True) fpath = download_url(URL, data_dir) fpath = extract_gzip(fpath) print('Downloaded and extracted file.') # random split file for train and test random_split_file(fpath) loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) set_logging_verbosity(config.verbose) subprocess_init = SubprocessInitializer() subprocess_init.register(setup_logging, config.verbose) subprocess_init.register(add_to_sys_path, loader.config_dir.name) edge_paths = [data_dir / name for name in FILENAMES.values()] convert_input_data( config.entities, config.relations, config.entity_path, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, dynamic_relations=config.dynamic_relations, ) train_path = [str(convert_path(data_dir / FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train(train_config, subprocess_init=subprocess_init) eval_path = [str(convert_path(data_dir / FILENAMES['test']))] eval_config = attr.evolve(config, edge_paths=eval_path) do_eval(eval_config, subprocess_init=subprocess_init)
def main(): setup_logging() parser = argparse.ArgumentParser(description='Example on Livejournal') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', type=Path, default='data', help='where to save processed data') args = parser.parse_args() # download data data_dir = args.data_dir data_dir.mkdir(parents=True, exist_ok=True) fpath = download_url(URL, data_dir) fpath = extract_gzip(fpath) print('Downloaded and extracted file.') # random split file for train and test random_split_file(fpath) loader = ConfigFileLoader() config = loader.load_config(args.config, args.param) set_logging_verbosity(config.verbose) subprocess_init = SubprocessInitializer() subprocess_init.register(setup_logging, config.verbose) subprocess_init.register(add_to_sys_path, loader.config_dir.name) input_edge_paths = [data_dir / name for name in FILENAMES] output_train_path, output_test_path = config.edge_paths convert_input_data( config.entities, config.relations, config.entity_path, config.edge_paths, input_edge_paths, TSVEdgelistReader(lhs_col=0, rhs_col=1, rel_col=None), dynamic_relations=config.dynamic_relations, ) train_config = attr.evolve(config, edge_paths=[output_train_path]) train(train_config, subprocess_init=subprocess_init) eval_config = attr.evolve(config, edge_paths=[output_test_path]) do_eval(eval_config, subprocess_init=subprocess_init)
def run_train_eval(): #将数据转为PBG可读的分区文件 convert_input_data(CONFIG_PATH, edge_paths, lhs_col=0, rhs_col=1, rel_col=None) #解析配置 config = parse_config(CONFIG_PATH) #训练配置,已分区的train_paths路径替换配置文件中的edge_paths train_config = attr.evolve(config, edge_paths=train_paths) #传入训练配置文件开始训练 train(train_config) #测试配置,已分区的eval_paths路径替换配置文件中的edge_paths eval_config = attr.evolve(config, edge_paths=eval_paths) #开始验证 do_eval(eval_config)
def test_dynamic_relations(self): relation_config = RelationSchema(name="r", lhs="el", rhs="er") base_config = ConfigSchema( dimension=10, relations=[relation_config], entities={ "el": EntitySchema(num_partitions=1), "er": EntitySchema(num_partitions=1), }, entity_path=None, # filled in later edge_paths=[], # filled in later checkpoint_path=self.checkpoint_path.name, dynamic_relations=True, global_emb=False, # Must be off for dynamic relations. workers=2, ) gen_config = attr.evolve( base_config, relations=[relation_config] * 10, dynamic_relations=False, # Must be off if more than 1 relation. ) dataset = generate_dataset(gen_config, num_entities=100, fractions=[0.04, 0.02]) self.addCleanup(dataset.cleanup) with open( os.path.join(dataset.entity_path.name, "dynamic_rel_count.txt"), "xt") as f: f.write("%d" % len(gen_config.relations)) train_config = attr.evolve( base_config, entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[0].name], ) eval_config = attr.evolve( base_config, relations=[attr.evolve(relation_config, all_negs=True)], entity_path=dataset.entity_path.name, edge_paths=[dataset.relation_paths[1].name], ) # Just make sure no exceptions are raised and nothing crashes. train(train_config, rank=0, subprocess_init=self.subprocess_init) self.assertCheckpointWritten(train_config, version=1) do_eval(eval_config, subprocess_init=self.subprocess_init)
def run_train_eval(): random_split_file(DATA_PATH) convert_input_data( CONFIG_PATH, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, ) train_config = parse_config(CONFIG_PATH) train_config = attr.evolve(train_config, edge_paths=train_path) train(train_config) eval_config = attr.evolve(train_config, edge_paths=eval_path) do_eval(eval_config)
def main(): setup_logging() parser = argparse.ArgumentParser(description="Example on FB15k") parser.add_argument("--config", default=DEFAULT_CONFIG, help="Path to config file") parser.add_argument("-p", "--param", action="append", nargs="*") parser.add_argument("--data_dir", type=Path, default="data", help="where to save processed data") parser.add_argument( "--no-filtered", dest="filtered", action="store_false", help="Run unfiltered eval", ) args = parser.parse_args() # download data data_dir = args.data_dir fpath = download_url(FB15K_URL, data_dir) extract_tar(fpath) print("Downloaded and extracted file.") loader = ConfigFileLoader() config = loader.load_config(args.config, args.param) set_logging_verbosity(config.verbose) subprocess_init = SubprocessInitializer() subprocess_init.register(setup_logging, config.verbose) subprocess_init.register(add_to_sys_path, loader.config_dir.name) input_edge_paths = [data_dir / name for name in FILENAMES] output_train_path, output_valid_path, output_test_path = config.edge_paths convert_input_data( config.entities, config.relations, config.entity_path, config.edge_paths, input_edge_paths, TSVEdgelistReader(lhs_col=0, rhs_col=2, rel_col=1), dynamic_relations=config.dynamic_relations, ) train_config = attr.evolve(config, edge_paths=[output_train_path]) train(train_config, subprocess_init=subprocess_init) relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve(config, edge_paths=[output_test_path], relations=relations, num_uniform_negs=0) if args.filtered: filter_paths = [output_test_path, output_valid_path, output_train_path] do_eval( eval_config, evaluator=FilteredRankingEvaluator(eval_config, filter_paths), subprocess_init=subprocess_init, ) else: do_eval(eval_config, subprocess_init=subprocess_init)
from torchbiggraph.config import parse_config import attr train_config = parse_config(CONFIG_PATH) train_path = [convert_path(os.path.join(DATA_DIR, FILENAMES['train']))] train_config = attr.evolve(train_config, edge_paths=train_path) from torchbiggraph.train import train train(train_config) from torchbiggraph.eval import do_eval eval_path = [convert_path(os.path.join(DATA_DIR, FILENAMES['test']))] eval_config = attr.evolve(train_config, edge_paths=eval_path) do_eval(eval_config) import json import h5py with open(os.path.join(DATA_DIR, "dictionary.json"), "rt") as tf: dictionary = json.load(tf) user_id = "0" offset = dictionary["entities"]["user_id"].index(user_id) print("our offset for user_id ", user_id, " is: ", offset) with h5py.File("model/example_2/embeddings_user_id_0.v10.h5", "r") as hf: embedding_user_0 = hf["embeddings"][offset, :] embedding_all = hf["embeddings"][:]
def main(): parser = argparse.ArgumentParser(description='Example on FB15k') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', default='data', help='where to save processed data') parser.add_argument('--no-filtered', dest='filtered', action='store_false', help='Run unfiltered eval') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir fpath = utils.download_url(FB15K_URL, data_dir) utils.extract_tar(fpath) print('Downloaded and extracted file.') loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()] convert_input_data( config.entities, config.relations, config.entity_path, edge_paths, lhs_col=0, rhs_col=2, rel_col=1, dynamic_relations=config.dynamic_relations, ) train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train( train_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), ) eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))] relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve(config, edge_paths=eval_path, relations=relations, num_uniform_negs=0) if args.filtered: filter_paths = [ convert_path(os.path.join(data_dir, FILENAMES['test'])), convert_path(os.path.join(data_dir, FILENAMES['valid'])), convert_path(os.path.join(data_dir, FILENAMES['train'])), ] do_eval( eval_config, evaluator=FilteredRankingEvaluator(eval_config, filter_paths), subprocess_init=partial(add_to_sys_path, loader.config_dir.name), ) else: do_eval( eval_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), )