Example #1
0
def test_item_local_abspath_with_clean_cache(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=CONFIG_PATH)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri, config_path=CONFIG_PATH)

    identifier = generate_identifier('tiny.png')

    with tmp_directory() as cache_dir:
        with tmp_env_var("DTOOL_AZURE_CACHE_DIRECTORY", cache_dir):

            dataset = DataSet.from_uri(dest_uri, config_path=CONFIG_PATH)
            fpath = dataset.item_content_abspath(identifier)

            assert os.path.isfile(fpath)
Example #2
0
def test_diff_sizes(tmp_uri_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_sizes

    fpaths = create_test_files(tmp_uri_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        base_uri=tmp_uri_fixture)
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["he"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        base_uri=tmp_uri_fixture)
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_sizes(ds_a, ds_a) == []

    expected = [
        (generate_identifier("file.txt"), 2, 3),
    ]
    assert diff_sizes(ds_a, ds_b) == expected
Example #3
0
def test_http_enable_with_presigned_url(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.put_readme("---\nproject: testing\n")
    proto_dataset.freeze()

    dataset = DataSet.from_uri(dest_uri)

    # Add an annotation.
    dataset.put_annotation("project", "dtool-testing")

    # Add tags.
    dataset.put_tag("amazing")
    dataset.put_tag("stuff")

    with tmp_env_var("DTOOL_S3_PUBLISH_EXPIRY", "120"):
        access_url = dataset._storage_broker.http_enable()
    assert access_url.find("?") != -1  # This is a presigned URL dataset.

    assert access_url.startswith("https://")

    dataset_from_http = DataSet.from_uri(access_url)

    # Assert that the annotation has been copied across.
    assert dataset_from_http.get_annotation("project") == "dtool-testing"

    # Asser that the tags are available.
    assert dataset_from_http.list_tags() == ["amazing", "stuff"]

    from dtoolcore.compare import (diff_identifiers, diff_sizes, diff_content)

    assert len(diff_identifiers(dataset, dataset_from_http)) == 0
    assert len(diff_sizes(dataset, dataset_from_http)) == 0
    assert len(diff_content(dataset_from_http, dataset)) == 0

    # Make sure that all the URLs in the manifest are presigned.
    http_manifest = dataset_from_http._storage_broker.http_manifest
    assert http_manifest["manifest_url"].find("?") != -1
    assert http_manifest["readme_url"].find("?") != -1
    for url in http_manifest["item_urls"].values():
        assert url.find("?") != -1
    for url in http_manifest["annotations"].values():
        assert url.find("?") != -1
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset-uri')
    parser.add_argument('--resource-uri')
    parser.add_argument('--identifier')
    parser.add_argument('--output-uri')

    args = parser.parse_args()

    dataset = DataSet.from_uri(args.dataset_uri)
    resource_dataset = DataSet.from_uri(args.resource_uri)
    output_dataset = ProtoDataSet.from_uri(args.output_uri)

    with temp_working_dir() as working_dir:
        outputs = separate_plots(
            dataset,
            args.identifier,
            resource_dataset,
            working_dir
        )

        overlays_to_copy = ['ordering', 'date']

        stage_outputs(
            outputs,
            working_dir,
            dataset,
            output_dataset,
            overlays_to_copy,
            args.identifier
        )
Example #5
0
def test_windows_abspath_uri(tmp_dir_fixture):  # NOQA
    from dtoolcore import DataSet, DataSetCreator
    from dtoolcore.utils import IS_WINDOWS

    with DataSetCreator("tmp_ds", tmp_dir_fixture):
        pass

    path = os.path.abspath(os.path.join(tmp_dir_fixture, "tmp_ds"))
    uri = "file://" + path
    if IS_WINDOWS:
        # Example Win URI: file:///C:/some/path/to/ds.
        # Note that "C:" is part of the path.
        uri = "file:///" + path.replace("\\", "/")

    DataSet.from_uri(uri)
Example #6
0
def main(dataset_uri):

    dataset = DataSet.from_uri(dataset_uri)

    display.im_ids = iter(identifiers_where_overlay_is_true(dataset, "is_image"))

    imid = next(display.im_ids)

    display.mask_overlay = dataset.get_overlay("mask_ids")
    im = imread(dataset.item_content_abspath(imid))
    mask_im = imread(dataset.item_content_abspath(display.mask_overlay[imid]))
    display.dataset = dataset
    display.image = scene.visuals.Image(im, parent=view.scene)
    display.mask_image = scene.visuals.Image(mask_im, parent=view.scene)
    display.mask_image.visible = False

    textstr = display.dataset.item_properties(imid)['relpath']
    t1 = scene.visuals.Text(textstr, parent=display.image, color='red', pos=(30,5))
    t1.font_size = 24
    display.t1 = t1

    view.camera = scene.PanZoomCamera(aspect=1)
    view.camera.set_range()
    view.camera.flip = (False, True, False)

    app.run()
def test_iter_datasets_in_base_uri(tmp_uri_fixture):  # NOQA

    from dtoolcore import (
        create_proto_dataset,
        DataSet,
        ProtoDataSet,
        iter_datasets_in_base_uri,
        iter_proto_datasets_in_base_uri,
    )

    # Create a proto dataset.
    proto_ds = create_proto_dataset("proto", tmp_uri_fixture)

    # Create a proto dataset.
    _frozen_ds = create_proto_dataset("frozen", tmp_uri_fixture)
    _frozen_ds.freeze()
    frozen_ds = DataSet.from_uri(_frozen_ds.uri)

    from_iter_datasets = list(iter_datasets_in_base_uri(tmp_uri_fixture))
    from_iter_proto_datasets = list(
        iter_proto_datasets_in_base_uri(tmp_uri_fixture))  # NOQA
    assert len(from_iter_datasets) == 1
    assert len(from_iter_datasets) == 1

    assert isinstance(from_iter_datasets[0], DataSet)
    assert isinstance(from_iter_proto_datasets[0], ProtoDataSet)

    assert proto_ds.uri == from_iter_proto_datasets[0].uri
    assert frozen_ds.uri == from_iter_datasets[0].uri
Example #8
0
def test_basic_workflow(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Example #9
0
def test_dataset_freeze_functional(chdir_fixture):  # NOQA
    from dtool_create.dataset import create, freeze, add
    runner = CliRunner()

    dataset_name = "my_dataset"
    result = runner.invoke(create, [dataset_name])
    assert result.exit_code == 0

    # At this point we have a proto dataset
    dataset_abspath = os.path.abspath(dataset_name)
    dataset_uri = sanitise_uri(dataset_abspath)
    dataset = ProtoDataSet.from_uri(dataset_uri)

    # Create sample file to the proto dataset.
    sample_file_name = "hello.txt"
    with open(sample_file_name, "w") as fh:
        fh.write("hello world")

    # Put it into the dataset

    result = runner.invoke(add, ["item", sample_file_name, dataset_uri])
    assert result.exit_code == 0

    result = runner.invoke(freeze, [dataset_uri])
    assert result.exit_code == 0

    # Now we have a dataset.
    dataset = DataSet.from_uri(dataset_uri)

    # Manifest has been updated.
    assert len(dataset.identifiers) == 1
def test_basic_workflow_on_first_namespace(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')

    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Example #11
0
def analyse_dataset(dataset_dir, output_dir):
    """Analyse all the files in the dataset."""
    dataset = DataSet.from_uri(dataset_dir)
    logging.info("Analysing items in dataset: {}".format(dataset.name))

    for i in dataset.identifiers:
        analyse_item(dataset_dir, output_dir, i)
def cli(dataset_uri):

    dataset = DataSet.from_uri(dataset_uri)

    create_illumina_metadata_overlay(dataset)
    create_read1_overlay(dataset)
    create_pair_id_overlay(dataset)
def test_dataset_summary_json_functional():

    from dtoolcore import DataSet
    from dtool_info.dataset import summary

    # Create expected output.
    lion_ds = DataSet.from_uri(lion_dataset_uri)

    tot_size = sum([
        lion_ds.item_properties(i)["size_in_bytes"]
        for i in lion_ds.identifiers
    ])

    expected = {
        "name": lion_ds.name,
        "uuid": lion_ds.uuid,
        "number_of_items": len(lion_ds.identifiers),
        "size_in_bytes": tot_size,
        "creator_username": lion_ds._admin_metadata["creator_username"],
        "frozen_at": lion_ds._admin_metadata["frozen_at"],
    }

    runner = CliRunner()

    result = runner.invoke(summary, ["--format", "json", lion_dataset_uri])
    assert result.exit_code == 0

    actual = json.loads(result.output)
    assert expected == actual
Example #14
0
def main(dataset_path):
    dataset = DataSet.from_uri(dataset_path)

    create_is_csv_overlay(dataset)

    def info_from_identifier(identifier):
        relpath = dataset.item_properties(identifier)['relpath']
        # label1 = "5A_CB_Rep3_T2"
        # compound = relpath
        # compound, _ = relpath.split('/')

        print(relpath)

        label1, compound, _ = relpath.split('/')

        try:
            label2, label3 = compound.rsplit('-', 1)
        except ValueError:
            label2, label3 = "?", "?"

        return label1, label2, label3

    fpaths_and_extra_data = [
        (dataset.item_content_abspath(identifier),
         info_from_identifier(identifier))
        for identifier in identifiers_where_overlay_is_true(dataset, "is_csv")
    ]

    import pprint
    pprint.pprint(fpaths_and_extra_data)

    build_master_csv(fpaths_and_extra_data, 'all_cells.csv')
def test_uri_property_when_using_relpath(chdir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              prefix=".")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    dataset = DataSet.from_uri("./my_dataset")
    expected_uri = "file://" + os.path.abspath("my_dataset")
    assert dataset.uri == expected_uri
Example #16
0
def main(dataset_uri):

    dataset = DataSet.from_uri(dataset_uri)
    app.dataset = dataset
    app.tags = {}

    app.image_generator = dataset_image_generator(dataset)

    im, app.current_id = next(app.image_generator)
    app.image = scene.visuals.Image(im, parent=view.scene)

    app.counter = 0
    textstr = "Image {}".format(app.counter)
    t1 = scene.visuals.Text(textstr,
                            parent=app.image,
                            color='red',
                            pos=(30, 5))
    t1.font_size = 24
    app.t1 = t1

    view.camera = scene.PanZoomCamera(aspect=1)
    view.camera.set_range()
    view.camera.flip = (False, True, False)

    app.run()
Example #17
0
def test_annotations(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    assert dataset.list_annotation_names() == []

    dataset.put_annotation("project", "demo")
    assert dataset.get_annotation("project") == "demo"

    assert dataset.list_annotation_names() == ["project"]
Example #18
0
    def __init__(self):
        parser = argparse.ArgumentParser()

        parser.add_argument(
            '-d',
            '--dataset',
            help='URI of input dataset'
        )
        parser.add_argument(
            '-i',
            '--identifier',
            help='Identifier (hash) to process'
        )
        parser.add_argument(
            '-o',
            '--output-dataset',
            help='URI of output dataset'
        )

        args = parser.parse_args()

        self.input_dataset = DataSet.from_uri(args.dataset)
        self.output_dataset = ProtoDataSet.from_uri(args.output_dataset)

        self.identifier = args.identifier
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset-uri', help='Dataset URI')
    parser.add_argument('--identifier', help='Identifier (hash) to process')
    parser.add_argument(
        '--output-uri',
        help='Output dataset uri'
    )

    args = parser.parse_args()

    dataset = DataSet.from_uri(args.dataset_uri)
    output_dataset = ProtoDataSet.from_uri(args.output_uri)

    with temp_working_dir() as working_dir:

        outputs = segment_single_identifier(
            dataset,
            args.identifier,
            working_dir
        )

        overlays_to_copy = ['coords', 'ordering', 'useful_name']

        stage_outputs(
            outputs,
            working_dir,
            dataset,
            output_dataset,
            overlays_to_copy,
            args.identifier
        )
    def __init__(self):
        parser = argparse.ArgumentParser()

        parser.add_argument(
            '-d',
            '--dataset-uri',
            help='URI of input dataset'
        )
        parser.add_argument(
            '-i',
            '--identifier',
            help='Identifier to process'
        )
        parser.add_argument(
            '-o',
            '--output-dataset-uri',
            help='URI of output dataset'
        )

        args = parser.parse_args()

        self.input_dataset = DataSet.from_uri(args.dataset_uri)
        self.output_dataset = ProtoDataSet.from_uri(args.output_dataset_uri)

        self.identifier = args.identifier
Example #21
0
 def __init__(self, uri):
     self.dataset = DataSet.from_uri(uri)
     self.tags = {
         n: 'Untagged'
         for n in list(range(len(self.dataset.identifiers)))
     }
     self.load_times = {}
     self.tag_times = {}
Example #22
0
def test_prefix_functional():  # NOQA

    from dtoolcore import DataSetCreator
    from dtoolcore import DataSet, iter_datasets_in_base_uri

    # Create a minimal dataset without a prefix
    with tmp_env_var("DTOOL_S3_DATASET_PREFIX", ""):
        with DataSetCreator("no-prefix", S3_TEST_BASE_URI) as ds_creator:
            ds_creator.put_annotation("prefix", "no")
            no_prefix_uri = ds_creator.uri

    dataset_no_prefix = DataSet.from_uri(no_prefix_uri)

    # Basic test that retrieval works.
    assert dataset_no_prefix.get_annotation("prefix") == "no"

    # Basic test that prefix is correct.
    structure_key = dataset_no_prefix._storage_broker.get_structure_key()
    assert structure_key.startswith(dataset_no_prefix.uuid)

    # Create a minimal dataset
    prefix = "u/olssont/"
    with tmp_env_var("DTOOL_S3_DATASET_PREFIX", prefix):
        with DataSetCreator("no-prefix", S3_TEST_BASE_URI) as ds_creator:
            ds_creator.put_annotation("prefix", "yes")
            prefix_uri = ds_creator.uri

    dataset_with_prefix = DataSet.from_uri(prefix_uri)

    # Basic test that retrieval works.
    assert dataset_with_prefix.get_annotation("prefix") == "yes"

    # Basic test that prefix is correct.
    structure_key = dataset_with_prefix._storage_broker.get_structure_key()
    assert structure_key.startswith(prefix)

    # Basic tests that everything can be picked up.
    dataset_uris = list(ds.uri
                        for ds in iter_datasets_in_base_uri(S3_TEST_BASE_URI))
    assert dataset_no_prefix.uri in dataset_uris
    assert dataset_with_prefix.uri in dataset_uris

    _remove_dataset(dataset_no_prefix.uri)
    _remove_dataset(dataset_with_prefix.uri)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset_path', help='Path to dataset')
    parser.add_argument('output_path', help='Output directory')

    args = parser.parse_args()

    dataset = DataSet.from_path(args.dataset_path)

    label_plots(dataset)
Example #24
0
def main(input_dataset_uri, output_dspath, param_json):

    output_base_uri = os.path.dirname(output_dspath)
    output_name = os.path.basename(output_dspath)

    input_ds = DataSet.from_uri(input_dataset_uri)
    params = Parameters.from_json_string(param_json)

    with DerivedDataSet(output_base_uri, output_name, input_ds) as output_ds:
        train_and_save_results(input_ds, output_ds, params)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset_path', help='Path to dataset')
    parser.add_argument('output_path', help='Output directory')

    args = parser.parse_args()

    dataset = DataSet.from_path(args.dataset_path)

    show_tag_locations(dataset, args.output_path)
def analyse_dataset(dataset_dir, output_dir):
    """Analyse all the files in the dataset."""
    dataset = DataSet.from_path(dataset_dir)
    logging.info("Analysing items in dataset: {}".format(dataset.name))

    for i in dataset.identifiers:
        data_item_abspath = dataset.abspath_from_identifier(i)
        item_info = dataset.item_from_identifier(i)

        specific_output_dir = item_output_path(output_dir, item_info["path"])
        analyse_file(data_item_abspath, specific_output_dir)
Example #27
0
def test_diff_content(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_content
    from dtoolcore.storagebroker import DiskStorageBroker

    fpaths = create_test_files(tmp_dir_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["cat"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_content(ds_a, ds_a) == []

    identifier = generate_identifier("file.txt")
    expected = [
        (generate_identifier("file.txt"),
         DiskStorageBroker.hasher(ds_a.item_content_abspath(identifier)),
         DiskStorageBroker.hasher(ds_b.item_content_abspath(identifier)))
    ]
    assert diff_content(ds_a, ds_b) == expected
Example #28
0
def test_http_enable(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.put_readme("---\nproject: testing\n")
    proto_dataset.freeze()

    dataset = DataSet.from_uri(dest_uri)

    access_url = dataset._storage_broker.http_enable()

    assert access_url.startswith("https://")

    dataset_from_http = DataSet.from_uri(access_url)

    from dtoolcore.compare import (
        diff_identifiers,
        diff_sizes,
        diff_content
    )

    assert len(diff_identifiers(dataset, dataset_from_http)) == 0
    assert len(diff_sizes(dataset, dataset_from_http)) == 0
    assert len(diff_content(dataset_from_http, dataset)) == 0
Example #29
0
def tmp_dataset_fixture(request):
    from dtoolcore import DataSet
    d = tempfile.mkdtemp()

    dataset_path = os.path.join(d, 'sample_data')
    shutil.copytree(TEST_SAMPLE_DATASET, dataset_path)

    @request.addfinalizer
    def teardown():
        shutil.rmtree(d)

    return DataSet.from_path(dataset_path)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset_path', help='Path to dataset')
    parser.add_argument('output_path', help='Output directory')

    args = parser.parse_args()

    AutoName.directory = args.output_path

    dataset = DataSet.from_path(args.dataset_path)

    explore_dataset(dataset, args.output_path, n=1)
Example #31
0
def test_dataset_ls_functional():

    from dtoolcore import DataSet
    from dtool_info.dataset import ls

    # Create one expected line.
    lion_ds = DataSet.from_uri(lion_dataset_uri)

    runner = CliRunner()

    result = runner.invoke(ls, [SAMPLE_DATASETS_DIR])
    assert result.exit_code == 0
    assert result.output.find(lion_ds.name) != -1
Example #32
0
def test_tags_functional(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()

    # Test put_tag on proto dataset.
    proto_dataset.put_tag("testing")

    proto_dataset.freeze()

    dataset = DataSet.from_uri(proto_dataset.uri)
    assert dataset.list_tags() == ["testing"]

    dataset.put_tag("amazing")
    dataset.put_tag("stuff")
    assert dataset.list_tags() == ["amazing", "stuff", "testing"]

    dataset.delete_tag("stuff")
    assert dataset.list_tags() == ["amazing", "testing"]

    # Putting the same tag is idempotent.
    dataset.put_tag("amazing")
    dataset.put_tag("amazing")
    dataset.put_tag("amazing")
    assert dataset.list_tags() == ["amazing", "testing"]

    # Tags can only be strings.
    from dtoolcore import DtoolCoreValueError
    with pytest.raises(DtoolCoreValueError):
        dataset.put_tag(1)

    # Tags need to adhere to the utils.name_is_valid() rules.
    from dtoolcore import DtoolCoreInvalidNameError
    with pytest.raises(DtoolCoreInvalidNameError):
        dataset.put_tag("!invalid")

    # Deleting a non exiting tag does not raise. It silently succeeds.
    dataset.delete_tag("dontexist")
Example #33
0
def test_dataset_uuid_functional():

    from dtoolcore import DataSet
    from dtool_info.dataset import uuid

    # Create expected output.
    lion_ds = DataSet.from_uri(lion_dataset_uri)
    expected_uuid = lion_ds.uuid

    runner = CliRunner()

    result = runner.invoke(uuid, [lion_dataset_uri])
    assert result.exit_code == 0
    assert result.output.strip() == expected_uuid
def analyse_dataset(dataset_dir, output_dir):
    """Analyse all the files in the dataset."""
    dataset = DataSet.from_uri(dataset_dir)
    logging.info("Analysing items in dataset: {}".format(dataset.name))

    for i in dataset.identifiers:
        data_item_abspath = dataset.item_content_abspath(i)
        item_info = dataset.item_properties(i)

        specific_output_dir = item_output_path(
            output_dir,
            item_info["relpath"]
        )
        analyse_file(data_item_abspath, specific_output_dir)
Example #35
0
def analyse_dataset(dataset_dir, output_dir, test_data_only=False):
    """Analyse all the files in the dataset."""
    dataset = DataSet.from_path(dataset_dir)
    logging.info("Analysing files in dataset: {}".format(dataset.name))

    for i in dataset.identifiers:
        abs_path = dataset.abspath_from_identifier(i)
        item_info = dataset.item_from_identifier(i)

        specific_output_dir = data_item_directory(output_dir,
                                                  item_info["path"])
        analyse_file(abs_path, specific_output_dir, test_data_only)
        if test_data_only:
            break
Example #36
0
def test_basic_workflow_with_nested_handle(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')
    handle = "subdir/tiny.png"

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, handle)

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier(handle)
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1

    # Ensure that the file exists in the disk dataset.
    # Particularly on Windows.
    item_abspath = os.path.join(
        tmp_dir_fixture,
        name,
        "data",
        "subdir",
        "tiny.png"
    )
    assert os.path.isfile(item_abspath)
    assert os.path.isfile(dataset.item_content_abspath(expected_identifier))

    # Ensure that the correct abspath is returned.
    # Particularly on Windows.
    assert dataset.item_content_abspath(expected_identifier) == item_abspath  # NOQA
def main(dataset_uri, config_path=None):

    dataset = DataSet.from_uri(dataset_uri, config_path=config_path)

    def name_from_identifier(identifier):
        item_properties = dataset.item_properties(identifier)
        name = item_properties['relpath'].rsplit('.', 1)[0]
        return name

    useful_name_overlay = {
        identifier: name_from_identifier(identifier)
        for identifier in dataset.identifiers
    }

    dataset.put_overlay("useful_name", useful_name_overlay)
Example #38
0
def test_copy_and_diff(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    import dtoolcore
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.compare import (
        diff_identifiers,
        diff_sizes,
        diff_content,
    )

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata)
    proto_dataset.create()
    proto_dataset.put_readme(content='---\ndescription: test')
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    remote_dataset = DataSet.from_uri(dest_uri)

    with tmp_directory() as local_dir:
        local_uri = dtoolcore.copy(dest_uri, local_dir)
        assert local_uri.startswith("file:/")
        local_dataset = DataSet.from_uri(local_uri)
        assert len(diff_identifiers(local_dataset, remote_dataset)) == 0
        assert len(diff_sizes(local_dataset, remote_dataset)) == 0
        assert len(diff_content(local_dataset, remote_dataset)) == 0