Esempio n. 1
0
 def test_entity_dimensions(self):
     entity_name = "e"
     relation_config = RelationSchema(name="r",
                                      lhs=entity_name,
                                      rhs=entity_name)
     base_config = ConfigSchema(
         dimension=10,
         relations=[relation_config],
         entities={
             entity_name: EntitySchema(num_partitions=1, dimension=8)
         },
         entity_path=None,  # filled in later
         edge_paths=[],  # filled in later
         checkpoint_path=self.checkpoint_path.name,
         workers=2,
     )
     dataset = generate_dataset(base_config,
                                num_entities=100,
                                fractions=[0.4, 0.2])
     self.addCleanup(dataset.cleanup)
     train_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[0].name],
     )
     eval_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[1].name],
         relations=[attr.evolve(relation_config, all_negs=True)],
     )
     # Just make sure no exceptions are raised and nothing crashes.
     train(train_config, rank=0, subprocess_init=self.subprocess_init)
     self.assertCheckpointWritten(train_config, version=1)
     do_eval(eval_config, subprocess_init=self.subprocess_init)
Esempio n. 2
0
 def test_featurized(self):
     e1 = EntitySchema(num_partitions=1, featurized=True)
     e2 = EntitySchema(num_partitions=1)
     r1 = RelationSchema(name="r1", lhs="e1", rhs="e2")
     r2 = RelationSchema(name="r2", lhs="e2", rhs="e1")
     base_config = ConfigSchema(
         dimension=10,
         relations=[r1, r2],
         entities={
             "e1": e1,
             "e2": e2
         },
         entity_path=None,  # filled in later
         edge_paths=[],  # filled in later
         checkpoint_path=self.checkpoint_path.name,
         workers=2,
     )
     dataset = generate_dataset(base_config,
                                num_entities=100,
                                fractions=[0.4, 0.2])
     self.addCleanup(dataset.cleanup)
     train_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[0].name],
     )
     eval_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[1].name],
     )
     # Just make sure no exceptions are raised and nothing crashes.
     train(train_config, rank=0, subprocess_init=self.subprocess_init)
     self.assertCheckpointWritten(train_config, version=1)
     do_eval(eval_config, subprocess_init=self.subprocess_init)
 def _test_gpu(self, do_half_precision=False, num_partitions=2):
     entity_name = "e"
     relation_config = RelationSchema(name="r", lhs=entity_name, rhs=entity_name)
     base_config = ConfigSchema(
         dimension=16,
         batch_size=1024,
         num_batch_negs=64,
         num_uniform_negs=64,
         relations=[relation_config],
         entities={entity_name: EntitySchema(num_partitions=num_partitions)},
         entity_path=None,  # filled in later
         edge_paths=[],  # filled in later
         checkpoint_path=self.checkpoint_path.name,
         workers=2,
         num_gpus=2,
         regularization_coef=1e-4,
         half_precision=do_half_precision,
     )
     dataset = generate_dataset(base_config, num_entities=100, fractions=[0.4, 0.2])
     self.addCleanup(dataset.cleanup)
     train_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[0].name],
     )
     eval_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[1].name],
         relations=[attr.evolve(relation_config, all_negs=True)],
     )
     # Just make sure no exceptions are raised and nothing crashes.
     train(train_config, rank=0, subprocess_init=self.subprocess_init)
     self.assertCheckpointWritten(train_config, version=1)
     do_eval(eval_config, subprocess_init=self.subprocess_init)
Esempio n. 4
0
def main():
    setup_logging()
    parser = argparse.ArgumentParser(description='Example on FB15k')
    parser.add_argument('--config', default=DEFAULT_CONFIG,
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir', type=Path, default='data',
                        help='where to save processed data')
    parser.add_argument('--no-filtered', dest='filtered', action='store_false',
                        help='Run unfiltered eval')
    args = parser.parse_args()

    if args.param is not None:
        overrides = chain.from_iterable(args.param)  # flatten
    else:
        overrides = None

    # download data
    data_dir = args.data_dir
    fpath = download_url(FB15K_URL, data_dir)
    extract_tar(fpath)
    print('Downloaded and extracted file.')

    loader = ConfigFileLoader()
    config = loader.load_config(args.config, overrides)
    set_logging_verbosity(config.verbose)
    subprocess_init = SubprocessInitializer()
    subprocess_init.register(setup_logging, config.verbose)
    subprocess_init.register(add_to_sys_path, loader.config_dir.name)
    input_edge_paths = [data_dir / name for name in FILENAMES]
    output_train_path, output_valid_path, output_test_path = config.edge_paths

    convert_input_data(
        config.entities,
        config.relations,
        config.entity_path,
        config.edge_paths,
        input_edge_paths,
        lhs_col=0,
        rhs_col=2,
        rel_col=1,
        dynamic_relations=config.dynamic_relations,
    )

    train_config = attr.evolve(config, edge_paths=[output_train_path])
    train(train_config, subprocess_init=subprocess_init)

    relations = [attr.evolve(r, all_negs=True) for r in config.relations]
    eval_config = attr.evolve(
        config, edge_paths=[output_test_path], relations=relations, num_uniform_negs=0)
    if args.filtered:
        filter_paths = [output_test_path, output_valid_path, output_train_path]
        do_eval(
            eval_config,
            evaluator=FilteredRankingEvaluator(eval_config, filter_paths),
            subprocess_init=subprocess_init,
        )
    else:
        do_eval(eval_config, subprocess_init=subprocess_init)
Esempio n. 5
0
def main():
    parser = argparse.ArgumentParser(description='Example on FB15k')
    parser.add_argument('--config',
                        default='./fb15k_config.py',
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir',
                        default='../../../data',
                        help='where to save processed data')
    parser.add_argument('--no-filtered',
                        dest='filtered',
                        action='store_false',
                        help='Run unfiltered eval')
    args = parser.parse_args()

    if args.param is not None:
        overrides = chain.from_iterable(args.param)  # flatten
    else:
        overrides = None

    # download data
    data_dir = args.data_dir
    #fpath = utils.download_url(FB15K_URL, data_dir)
    #utils.extract_tar(fpath)
    #print('Downloaded and extracted file.')

    edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()]
    print('edge_paths', edge_paths)
    convert_input_data(
        args.config,
        edge_paths,
        lhs_col=0,
        rhs_col=2,
        rel_col=1,
    )

    config = parse_config(args.config, overrides)

    train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))]
    train_config = attr.evolve(config, edge_paths=train_path)

    train(train_config)

    eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))]
    relations = [attr.evolve(r, all_negs=True) for r in config.relations]
    eval_config = attr.evolve(config,
                              edge_paths=eval_path,
                              relations=relations)
    if args.filtered:
        filter_paths = [
            convert_path(os.path.join(data_dir, FILENAMES['test'])),
            convert_path(os.path.join(data_dir, FILENAMES['valid'])),
            convert_path(os.path.join(data_dir, FILENAMES['train'])),
        ]
        do_eval(eval_config, FilteredRankingEvaluator(eval_config,
                                                      filter_paths))
    else:
        do_eval(eval_config)
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser(description='Example on Livejournal')
    parser.add_argument('--config',
                        default=DEFAULT_CONFIG,
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir',
                        default='data',
                        help='where to save processed data')

    args = parser.parse_args()

    if args.param is not None:
        overrides = chain.from_iterable(args.param)  # flatten
    else:
        overrides = None

    # download data
    data_dir = args.data_dir
    os.makedirs(data_dir, exist_ok=True)
    fpath = utils.download_url(URL, data_dir)
    fpath = utils.extract_gzip(fpath)
    print('Downloaded and extracted file.')

    # random split file for train and test
    random_split_file(fpath)

    loader = ConfigFileLoader()
    config = loader.load_config(args.config, overrides)
    edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()]

    convert_input_data(
        config.entities,
        config.relations,
        config.entity_path,
        edge_paths,
        lhs_col=0,
        rhs_col=1,
        rel_col=None,
        dynamic_relations=config.dynamic_relations,
    )

    train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))]
    train_config = attr.evolve(config, edge_paths=train_path)

    train(
        train_config,
        subprocess_init=partial(add_to_sys_path, loader.config_dir.name),
    )

    eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))]
    eval_config = attr.evolve(config, edge_paths=eval_path)

    do_eval(
        eval_config,
        subprocess_init=partial(add_to_sys_path, loader.config_dir.name),
    )
Esempio n. 7
0
def main():
    setup_logging()
    parser = argparse.ArgumentParser(description='Example on Livejournal')
    parser.add_argument('--config',
                        default=DEFAULT_CONFIG,
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir',
                        type=Path,
                        default='data',
                        help='where to save processed data')

    args = parser.parse_args()

    if args.param is not None:
        overrides = chain.from_iterable(args.param)  # flatten
    else:
        overrides = None

    # download data
    data_dir = args.data_dir
    data_dir.mkdir(parents=True, exist_ok=True)
    fpath = download_url(URL, data_dir)
    fpath = extract_gzip(fpath)
    print('Downloaded and extracted file.')

    # random split file for train and test
    random_split_file(fpath)

    loader = ConfigFileLoader()
    config = loader.load_config(args.config, overrides)
    set_logging_verbosity(config.verbose)
    subprocess_init = SubprocessInitializer()
    subprocess_init.register(setup_logging, config.verbose)
    subprocess_init.register(add_to_sys_path, loader.config_dir.name)
    edge_paths = [data_dir / name for name in FILENAMES.values()]

    convert_input_data(
        config.entities,
        config.relations,
        config.entity_path,
        edge_paths,
        lhs_col=0,
        rhs_col=1,
        rel_col=None,
        dynamic_relations=config.dynamic_relations,
    )

    train_path = [str(convert_path(data_dir / FILENAMES['train']))]
    train_config = attr.evolve(config, edge_paths=train_path)

    train(train_config, subprocess_init=subprocess_init)

    eval_path = [str(convert_path(data_dir / FILENAMES['test']))]
    eval_config = attr.evolve(config, edge_paths=eval_path)

    do_eval(eval_config, subprocess_init=subprocess_init)
Esempio n. 8
0
def main():
    setup_logging()
    parser = argparse.ArgumentParser(description='Example on Livejournal')
    parser.add_argument('--config',
                        default=DEFAULT_CONFIG,
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir',
                        type=Path,
                        default='data',
                        help='where to save processed data')

    args = parser.parse_args()

    # download data
    data_dir = args.data_dir
    data_dir.mkdir(parents=True, exist_ok=True)
    fpath = download_url(URL, data_dir)
    fpath = extract_gzip(fpath)
    print('Downloaded and extracted file.')

    # random split file for train and test
    random_split_file(fpath)

    loader = ConfigFileLoader()
    config = loader.load_config(args.config, args.param)
    set_logging_verbosity(config.verbose)
    subprocess_init = SubprocessInitializer()
    subprocess_init.register(setup_logging, config.verbose)
    subprocess_init.register(add_to_sys_path, loader.config_dir.name)
    input_edge_paths = [data_dir / name for name in FILENAMES]
    output_train_path, output_test_path = config.edge_paths

    convert_input_data(
        config.entities,
        config.relations,
        config.entity_path,
        config.edge_paths,
        input_edge_paths,
        TSVEdgelistReader(lhs_col=0, rhs_col=1, rel_col=None),
        dynamic_relations=config.dynamic_relations,
    )

    train_config = attr.evolve(config, edge_paths=[output_train_path])
    train(train_config, subprocess_init=subprocess_init)

    eval_config = attr.evolve(config, edge_paths=[output_test_path])
    do_eval(eval_config, subprocess_init=subprocess_init)
Esempio n. 9
0
def run_train_eval():
    #将数据转为PBG可读的分区文件
    convert_input_data(CONFIG_PATH,
                       edge_paths,
                       lhs_col=0,
                       rhs_col=1,
                       rel_col=None)
    #解析配置
    config = parse_config(CONFIG_PATH)
    #训练配置,已分区的train_paths路径替换配置文件中的edge_paths
    train_config = attr.evolve(config, edge_paths=train_paths)
    #传入训练配置文件开始训练
    train(train_config)
    #测试配置,已分区的eval_paths路径替换配置文件中的edge_paths
    eval_config = attr.evolve(config, edge_paths=eval_paths)
    #开始验证
    do_eval(eval_config)
Esempio n. 10
0
 def test_dynamic_relations(self):
     relation_config = RelationSchema(name="r", lhs="el", rhs="er")
     base_config = ConfigSchema(
         dimension=10,
         relations=[relation_config],
         entities={
             "el": EntitySchema(num_partitions=1),
             "er": EntitySchema(num_partitions=1),
         },
         entity_path=None,  # filled in later
         edge_paths=[],  # filled in later
         checkpoint_path=self.checkpoint_path.name,
         dynamic_relations=True,
         global_emb=False,  # Must be off for dynamic relations.
         workers=2,
     )
     gen_config = attr.evolve(
         base_config,
         relations=[relation_config] * 10,
         dynamic_relations=False,  # Must be off if more than 1 relation.
     )
     dataset = generate_dataset(gen_config,
                                num_entities=100,
                                fractions=[0.04, 0.02])
     self.addCleanup(dataset.cleanup)
     with open(
             os.path.join(dataset.entity_path.name,
                          "dynamic_rel_count.txt"), "xt") as f:
         f.write("%d" % len(gen_config.relations))
     train_config = attr.evolve(
         base_config,
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[0].name],
     )
     eval_config = attr.evolve(
         base_config,
         relations=[attr.evolve(relation_config, all_negs=True)],
         entity_path=dataset.entity_path.name,
         edge_paths=[dataset.relation_paths[1].name],
     )
     # Just make sure no exceptions are raised and nothing crashes.
     train(train_config, rank=0, subprocess_init=self.subprocess_init)
     self.assertCheckpointWritten(train_config, version=1)
     do_eval(eval_config, subprocess_init=self.subprocess_init)
Esempio n. 11
0
def run_train_eval():
    random_split_file(DATA_PATH)

    convert_input_data(
        CONFIG_PATH,
        edge_paths,
        lhs_col=0,
        rhs_col=1,
        rel_col=None,
    )

    train_config = parse_config(CONFIG_PATH)

    train_config = attr.evolve(train_config, edge_paths=train_path)

    train(train_config)

    eval_config = attr.evolve(train_config, edge_paths=eval_path)

    do_eval(eval_config)
Esempio n. 12
0
def main():
    setup_logging()
    parser = argparse.ArgumentParser(description="Example on FB15k")
    parser.add_argument("--config",
                        default=DEFAULT_CONFIG,
                        help="Path to config file")
    parser.add_argument("-p", "--param", action="append", nargs="*")
    parser.add_argument("--data_dir",
                        type=Path,
                        default="data",
                        help="where to save processed data")
    parser.add_argument(
        "--no-filtered",
        dest="filtered",
        action="store_false",
        help="Run unfiltered eval",
    )
    args = parser.parse_args()

    # download data
    data_dir = args.data_dir
    fpath = download_url(FB15K_URL, data_dir)
    extract_tar(fpath)
    print("Downloaded and extracted file.")

    loader = ConfigFileLoader()
    config = loader.load_config(args.config, args.param)
    set_logging_verbosity(config.verbose)
    subprocess_init = SubprocessInitializer()
    subprocess_init.register(setup_logging, config.verbose)
    subprocess_init.register(add_to_sys_path, loader.config_dir.name)
    input_edge_paths = [data_dir / name for name in FILENAMES]
    output_train_path, output_valid_path, output_test_path = config.edge_paths

    convert_input_data(
        config.entities,
        config.relations,
        config.entity_path,
        config.edge_paths,
        input_edge_paths,
        TSVEdgelistReader(lhs_col=0, rhs_col=2, rel_col=1),
        dynamic_relations=config.dynamic_relations,
    )

    train_config = attr.evolve(config, edge_paths=[output_train_path])
    train(train_config, subprocess_init=subprocess_init)

    relations = [attr.evolve(r, all_negs=True) for r in config.relations]
    eval_config = attr.evolve(config,
                              edge_paths=[output_test_path],
                              relations=relations,
                              num_uniform_negs=0)
    if args.filtered:
        filter_paths = [output_test_path, output_valid_path, output_train_path]
        do_eval(
            eval_config,
            evaluator=FilteredRankingEvaluator(eval_config, filter_paths),
            subprocess_init=subprocess_init,
        )
    else:
        do_eval(eval_config, subprocess_init=subprocess_init)
Esempio n. 13
0
from torchbiggraph.config import parse_config
import attr
train_config = parse_config(CONFIG_PATH)

train_path = [convert_path(os.path.join(DATA_DIR, FILENAMES['train']))]
train_config = attr.evolve(train_config, edge_paths=train_path)

from torchbiggraph.train import train
train(train_config)

from torchbiggraph.eval import do_eval

eval_path = [convert_path(os.path.join(DATA_DIR, FILENAMES['test']))]
eval_config = attr.evolve(train_config, edge_paths=eval_path)

do_eval(eval_config)

import json
import h5py

with open(os.path.join(DATA_DIR, "dictionary.json"), "rt") as tf:
    dictionary = json.load(tf)

user_id = "0"
offset = dictionary["entities"]["user_id"].index(user_id)
print("our offset for user_id ", user_id, " is: ", offset)

with h5py.File("model/example_2/embeddings_user_id_0.v10.h5", "r") as hf:
    embedding_user_0 = hf["embeddings"][offset, :]
    embedding_all = hf["embeddings"][:]
Esempio n. 14
0
def main():
    parser = argparse.ArgumentParser(description='Example on FB15k')
    parser.add_argument('--config',
                        default=DEFAULT_CONFIG,
                        help='Path to config file')
    parser.add_argument('-p', '--param', action='append', nargs='*')
    parser.add_argument('--data_dir',
                        default='data',
                        help='where to save processed data')
    parser.add_argument('--no-filtered',
                        dest='filtered',
                        action='store_false',
                        help='Run unfiltered eval')
    args = parser.parse_args()

    if args.param is not None:
        overrides = chain.from_iterable(args.param)  # flatten
    else:
        overrides = None

    # download data
    data_dir = args.data_dir
    fpath = utils.download_url(FB15K_URL, data_dir)
    utils.extract_tar(fpath)
    print('Downloaded and extracted file.')

    loader = ConfigFileLoader()
    config = loader.load_config(args.config, overrides)
    edge_paths = [os.path.join(data_dir, name) for name in FILENAMES.values()]

    convert_input_data(
        config.entities,
        config.relations,
        config.entity_path,
        edge_paths,
        lhs_col=0,
        rhs_col=2,
        rel_col=1,
        dynamic_relations=config.dynamic_relations,
    )

    train_path = [convert_path(os.path.join(data_dir, FILENAMES['train']))]
    train_config = attr.evolve(config, edge_paths=train_path)

    train(
        train_config,
        subprocess_init=partial(add_to_sys_path, loader.config_dir.name),
    )

    eval_path = [convert_path(os.path.join(data_dir, FILENAMES['test']))]
    relations = [attr.evolve(r, all_negs=True) for r in config.relations]
    eval_config = attr.evolve(config,
                              edge_paths=eval_path,
                              relations=relations,
                              num_uniform_negs=0)
    if args.filtered:
        filter_paths = [
            convert_path(os.path.join(data_dir, FILENAMES['test'])),
            convert_path(os.path.join(data_dir, FILENAMES['valid'])),
            convert_path(os.path.join(data_dir, FILENAMES['train'])),
        ]
        do_eval(
            eval_config,
            evaluator=FilteredRankingEvaluator(eval_config, filter_paths),
            subprocess_init=partial(add_to_sys_path, loader.config_dir.name),
        )
    else:
        do_eval(
            eval_config,
            subprocess_init=partial(add_to_sys_path, loader.config_dir.name),
        )