def main(): setup_logging() parser = argparse.ArgumentParser(description='Example on FB15k') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', type=Path, default='data', help='where to save processed data') parser.add_argument('--no-filtered', dest='filtered', action='store_false', help='Run unfiltered eval') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir fpath = download_url(FB15K_URL, data_dir) extract_tar(fpath) print('Downloaded and extracted file.') loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) set_logging_verbosity(config.verbose) subprocess_init = SubprocessInitializer() subprocess_init.register(setup_logging, config.verbose) subprocess_init.register(add_to_sys_path, loader.config_dir.name) input_edge_paths = [data_dir / name for name in FILENAMES] output_train_path, output_valid_path, output_test_path = config.edge_paths convert_input_data( config.entities, config.relations, config.entity_path, config.edge_paths, input_edge_paths, lhs_col=0, rhs_col=2, rel_col=1, dynamic_relations=config.dynamic_relations, ) train_config = attr.evolve(config, edge_paths=[output_train_path]) train(train_config, subprocess_init=subprocess_init) relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve( config, edge_paths=[output_test_path], relations=relations, num_uniform_negs=0) if args.filtered: filter_paths = [output_test_path, output_valid_path, output_train_path] do_eval( eval_config, evaluator=FilteredRankingEvaluator(eval_config, filter_paths), subprocess_init=subprocess_init, ) else: do_eval(eval_config, subprocess_init=subprocess_init)
def main(): parser = argparse.ArgumentParser(description='Example on FB15k') parser.add_argument('--config', default='./fb15k_config.py', help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', default='../../../data', help='where to save processed data') parser.add_argument('--no-filtered', dest='filtered', action='store_false', help='Run unfiltered eval') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir #fpath = utils.download_url(FB15K_URL, data_dir) #utils.extract_tar(fpath) #print('Downloaded and extracted file.') edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()] print('edge_paths', edge_paths) convert_input_data( args.config, edge_paths, lhs_col=0, rhs_col=2, rel_col=1, ) config = parse_config(args.config, overrides) train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train(train_config) eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))] relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve(config, edge_paths=eval_path, relations=relations) if args.filtered: filter_paths = [ convert_path(os.path.join(data_dir, FILENAMES['test'])), convert_path(os.path.join(data_dir, FILENAMES['valid'])), convert_path(os.path.join(data_dir, FILENAMES['train'])), ] do_eval(eval_config, FilteredRankingEvaluator(eval_config, filter_paths)) else: do_eval(eval_config)
def main(): parser = argparse.ArgumentParser(description='Example on Livejournal') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', default='data', help='where to save processed data') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir os.makedirs(data_dir, exist_ok=True) fpath = utils.download_url(URL, data_dir) fpath = utils.extract_gzip(fpath) print('Downloaded and extracted file.') # random split file for train and test random_split_file(fpath) loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()] convert_input_data( config.entities, config.relations, config.entity_path, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, dynamic_relations=config.dynamic_relations, ) train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train( train_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), ) eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))] eval_config = attr.evolve(config, edge_paths=eval_path) do_eval( eval_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), )
def main(): setup_logging() parser = argparse.ArgumentParser(description='Example on Livejournal') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', type=Path, default='data', help='where to save processed data') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir data_dir.mkdir(parents=True, exist_ok=True) fpath = download_url(URL, data_dir) fpath = extract_gzip(fpath) print('Downloaded and extracted file.') # random split file for train and test random_split_file(fpath) loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) set_logging_verbosity(config.verbose) subprocess_init = SubprocessInitializer() subprocess_init.register(setup_logging, config.verbose) subprocess_init.register(add_to_sys_path, loader.config_dir.name) edge_paths = [data_dir / name for name in FILENAMES.values()] convert_input_data( config.entities, config.relations, config.entity_path, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, dynamic_relations=config.dynamic_relations, ) train_path = [str(convert_path(data_dir / FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train(train_config, subprocess_init=subprocess_init) eval_path = [str(convert_path(data_dir / FILENAMES['test']))] eval_config = attr.evolve(config, edge_paths=eval_path) do_eval(eval_config, subprocess_init=subprocess_init)
def run_train_eval(): #将数据转为PBG可读的分区文件 convert_input_data(CONFIG_PATH, edge_paths, lhs_col=0, rhs_col=1, rel_col=None) #解析配置 config = parse_config(CONFIG_PATH) #训练配置,已分区的train_paths路径替换配置文件中的edge_paths train_config = attr.evolve(config, edge_paths=train_paths) #传入训练配置文件开始训练 train(train_config) #测试配置,已分区的eval_paths路径替换配置文件中的edge_paths eval_config = attr.evolve(config, edge_paths=eval_paths) #开始验证 do_eval(eval_config)
def run_train_eval(): random_split_file(DATA_PATH) convert_input_data( CONFIG_PATH, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, ) train_config = parse_config(CONFIG_PATH) train_config = attr.evolve(train_config, edge_paths=train_path) train(train_config) eval_config = attr.evolve(train_config, edge_paths=eval_path) do_eval(eval_config)
edge_paths = [os.path.join(DATA_DIR, name) for name in FILENAMES.values()] from torchbiggraph.converters.import_from_tsv import convert_input_data convert_input_data( CONFIG_PATH, edge_paths, lhs_col=0, rhs_col=1, rel_col=None, ) ### SNIPPET 2 ###
def main(): parser = argparse.ArgumentParser(description='Example on FB15k') parser.add_argument('--config', default=DEFAULT_CONFIG, help='Path to config file') parser.add_argument('-p', '--param', action='append', nargs='*') parser.add_argument('--data_dir', default='data', help='where to save processed data') parser.add_argument('--no-filtered', dest='filtered', action='store_false', help='Run unfiltered eval') args = parser.parse_args() if args.param is not None: overrides = chain.from_iterable(args.param) # flatten else: overrides = None # download data data_dir = args.data_dir fpath = utils.download_url(FB15K_URL, data_dir) utils.extract_tar(fpath) print('Downloaded and extracted file.') loader = ConfigFileLoader() config = loader.load_config(args.config, overrides) edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()] convert_input_data( config.entities, config.relations, config.entity_path, edge_paths, lhs_col=0, rhs_col=2, rel_col=1, dynamic_relations=config.dynamic_relations, ) train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))] train_config = attr.evolve(config, edge_paths=train_path) train( train_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), ) eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))] relations = [attr.evolve(r, all_negs=True) for r in config.relations] eval_config = attr.evolve(config, edge_paths=eval_path, relations=relations, num_uniform_negs=0) if args.filtered: filter_paths = [ convert_path(os.path.join(data_dir, FILENAMES['test'])), convert_path(os.path.join(data_dir, FILENAMES['valid'])), convert_path(os.path.join(data_dir, FILENAMES['train'])), ] do_eval( eval_config, evaluator=FilteredRankingEvaluator(eval_config, filter_paths), subprocess_init=partial(add_to_sys_path, loader.config_dir.name), ) else: do_eval( eval_config, subprocess_init=partial(add_to_sys_path, loader.config_dir.name), )
0, # to reproduce results, we need to use all training data workers=1, distributed_init_method="tpc://localhost:30050", ) for num_part in args.num_parts: datadir = "{}_big_{}".format(args.dataset, num_part) config_dict['entity_path'] = os.path.join(args.root_output, datadir) config_dict['entities']['all']['num_partitions'] = num_part config_dict['edge_paths'] = [ os.path.join(args.root_output, datadir, datadir) ] config = ConfigSchema.from_dict(config_dict) convert_input_data( config.entities, config.relations, config.entity_path, config.edge_paths, [ Path( os.path.join( args.root_output, "{}_text/edgelist_pybig.txt".format(args.dataset))) ], lhs_col=0, rhs_col=2, rel_col=1, dynamic_relations=config.dynamic_relations, )
comparator='dot', loss_fn='softmax', lr=0.1, num_uniform_negs=50, eval_fraction= 0, # to reproduce results, we need to use all training data workers=1, distributed_init_method="tpc://localhost:30050", ) for num_part in tqdm(args.num_parts.split(","), desc='Run part'): num_part = int(num_part) datadir = "{}_big_rw_{}".format(args.dataset, num_part) config_dict['entity_path'] = os.path.join(args.root_output, datadir) config_dict['entities']['all']['num_partitions'] = num_part config_dict['edge_paths'] = [ os.path.join(args.root_output, datadir, datadir) ] config = ConfigSchema.from_dict(config_dict) convert_input_data( config.entities, config.relations, config.entity_path, config.edge_paths, [Path(edge_file_temp)], lhs_col=0, rhs_col=2, rel_col=1, dynamic_relations=config.dynamic_relations, )