Exemple #1
0
def test_setup_parser_with_proto():
    parser = proto_util.setup_parser_with_proto(cpb.ConstructConfig())
    args = parser.parse_args([])
    assert hasattr(args, "cluster.address")
    assert hasattr(args, "cluster.port")
    assert hasattr(args, "ftp.address")
    assert hasattr(args, "ftp.workdir")
Exemple #2
0
def test_transfer_args_to_proto():
    actual = cpb.ConstructConfig()
    actual.cluster.address = "original_addr_val"
    actual.cluster.port = 1234
    actual.ftp.address = "unrelated"
    # Overwrite some values with ns
    ns = Namespace()
    setattr(ns, "cluster.address", "NEW_addr_val")
    setattr(ns, "cluster.port", 4321)
    proto_util.transfer_args_to_proto(ns, actual)

    expected = cpb.ConstructConfig()
    expected.cluster.address = "NEW_addr_val"
    expected.cluster.port = 4321
    expected.ftp.address = "unrelated"

    assert actual == expected
Exemple #3
0
def test_parse_proto_fields_build_config():
    expected = set([
        "cluster.address",
        "cluster.port",
        "ftp.address",
        "ftp.workdir",
    ])
    actual = set(proto_util.get_full_field_names(cpb.ConstructConfig()))
    # Assert that the ConstructConfig has at least these names
    assert actual.intersection(expected) == expected
Exemple #4
0
def setup_directories(config: cpb.ConstructConfig()) -> Dict[str, Path]:
    # Directory Structure
    """
  {scratch_root_dir}/
    checkpoints/
      ...
    helper_data/
      faiss_index/
      hash_to_name/
      semrep/

  {output_dir}/
    json_dump/
      graph_data/
        ...
      sentence_data/
        ...
  """

    # intermediate dirs
    scratch_root_dir = Path(config.scratch_dir)
    checkpoint_dir = scratch_root_dir.joinpath("checkpoints")
    helper_data_dir = scratch_root_dir.joinpath("helper_data")
    faiss_index_dir = helper_data_dir.joinpath("faiss_index")
    hash2name_dir = helper_data_dir.joinpath("hash_to_name")
    semrep_work_dir = helper_data_dir.joinpath("semrep")

    # output dirs
    output_dir = Path(config.output_dir)
    output_dump_dir = output_dir.joinpath("json_dump")
    output_graph_dir = output_dump_dir.joinpath("graph_data")
    output_sentence_dir = output_dump_dir.joinpath("sentence_data")

    # For each of the directories specified above
    for val in locals().values():
        if isinstance(val, Path):
            val.mkdir(parents=True, exist_ok=True)

    # Helper Paths
    faiss_index_path = faiss_index_dir.joinpath("final.index")
    hash2name_db = hash2name_dir.joinpath("hash2name.sqlite3")

    # Return all values created in this function
    return locals()
Exemple #5
0
def test_set_field_nested():
    expected = cpb.ConstructConfig()
    expected.cluster.address = "new_addr_val"
    actual = cpb.ConstructConfig()
    proto_util.set_field(actual, "cluster.address", "new_addr_val")
    assert actual == expected
Exemple #6
0
    dpg.add_global_preloader(client=dask_client, preloader=preloader)


def setup_checkpoints(config: cpb.ConstructConfig) -> None:
    # Setup checkpoint
    checkpoint.set_root(checkpoint_dir)
    if config.cluster.disable_checkpoints:
        checkpoint.disable()
    if config.HasField("stop_after_ckpt"):
        checkpoint.set_halt_point(config.stop_after_ckpt)
    if config.cluster.clear_checkpoints:
        checkpoint.clear_all_ckpt()


if __name__ == "__main__":
    config = cpb.ConstructConfig()
    # Creates a parser with arguments corresponding to all of the provided fields.
    # Copy any command-line specified args to the config
    proto_util.parse_args_to_config_proto(config)
    print("Running agatha build with the following custom parameters:")
    print(config)

    # Adds all setup directories to the current scope
    locals().update(setup_directories(config))
    setup_cluster(config, faiss_index_path)
    setup_checkpoints(config)

    ##############################################################################
    # BEGIN PIPELINE                                                             #
    ##############################################################################