def test_create_dataset_from_hdfs_uri(self, context, test_org,
                                          test_data_urls):
        """
        <b>Description:</b>
        Check that dataset can be created by passing hdfs uri as source.

        <b>Input data:</b>
        1. organization id
        2. hdfs URI

        <b>Expected results:</b>
        Test passes when dataset is successfully created and it has a hdfs uri in source_uri property.

        <b>Steps:</b>
        1. Create dataset from an url.
        3. Get dataset target_uri (hdfs path) and create new dataset with it.
        4. Compare second dataset source_uri is the same as first dataset target_uri.
        """
        step("Create source dataset")
        _, source_dataset = data_catalog.create_dataset_from_link(
            context, test_org.guid, test_data_urls.test_transfer.url)
        step("Create dataset from hdfs uri")
        _, dataset = data_catalog.create_dataset_from_link(
            context, test_org.guid, source_dataset.target_uri)
        assert dataset.source_uri == source_dataset.target_uri
Esempio n. 2
0
 def test_create_dataset_from_hdfs_uri(self, context, test_org):
     step("Create source dataset")
     _, source_dataset = data_catalog.create_dataset_from_link(
         context, test_org, Urls.test_transfer_link)
     step("Create dataset from hdfs uri")
     _, dataset = data_catalog.create_dataset_from_link(
         context, test_org, source_dataset.target_uri)
     assert dataset.source_uri == source_dataset.target_uri
    def test_1_create_transfer_and_dataset_from_hadoop_mapreduce_examples(
            self, class_context, test_org, add_admin_to_test_org,
            test_data_urls):
        """
        <b>Description:</b>
        Create transfer and dataset from hadoop mapreduce examples.

        <b>Input data:</b>
        1. organization id
        2. Url with hadoop mapreduce examples

        <b>Expected results:</b>
        Test passes when transfer and dataset are created successfully with hadoop mapreduce examples
        and hdfs path is retrieved.

        <b>Steps:</b>
        1. Create transfer and dataset from hadoop mapreduce examples.
        2. Retrieve a hdfs source file path
        """
        step("Create dataset from hadoop mapreduce examples jar")
        _, dataset = create_dataset_from_link(
            context=class_context,
            org_guid=test_org.guid,
            source=test_data_urls.hadoop_mapreduce_examples.url)
        step("Create jar path")
        self.__class__.jar_path = dataset.target_uri.replace(
            "hdfs://nameservice1", "")
Esempio n. 4
0
 def dataset(cls, request, test_org, add_admin_to_test_org, class_context,
             test_data_urls):
     _, dataset = data_catalog.create_dataset_from_link(
         class_context,
         org_guid=test_org.guid,
         source=test_data_urls.test_transfer.url)
     return dataset
Esempio n. 5
0
    def test_create_transfer_from_atk_model_file(self, context, test_org,
                                                 ref_space, atk_virtualenv,
                                                 initial_dataset):
        step("Get atk app from core space")
        atk_app = next(
            (app
             for app in Application.cf_api_get_list_by_space(ref_space.guid)
             if app.name == "atk"), None)
        if atk_app is None:
            raise AssertionError("Atk app not found in core space")

        step("Install atk client package in virtualenv")
        atk_virtualenv.create()
        atk_virtualenv.pip_install(ATKtools.get_atk_client_url(
            atk_app.urls[0]))

        step("Run atk create model script")
        atk_test_script_path = os.path.join("fixtures", "atk_test_scripts",
                                            "atk_create_model.py")
        response = atk_virtualenv.run_atk_script(
            atk_test_script_path,
            atk_app.urls[0],
            arguments={"--target_uri": initial_dataset.target_uri})

        step("Retrieve path to model file created by atk")
        hdfs_model_path = response.split("hdfs_model_path: ", 1)[1]

        step("Create dataset by providing retrieved model file path")
        _, ds = data_catalog.create_dataset_from_link(context,
                                                      org=test_org,
                                                      source=hdfs_model_path)
        assert ds.source_uri == hdfs_model_path
Esempio n. 6
0
 def dataset(self, test_org, context, test_data_urls):
     step("Create data set")
     _, dataset = data_catalog.create_dataset_from_link(
         context,
         org_guid=test_org.guid,
         source=test_data_urls.test_transfer.url)
     return dataset
    def test_1_check_dataset_from_HDFS(self, test_org):
        datasets = data_catalog.create_datasets_from_links(self.context, test_org.guid, [self.HDFS_CONFIG_DIR +
                                                           source for source in self.HDFS_CONFIG_FILES])
        assertions.assert_datasets_not_empty(datasets)

        _, self.__class__.TEST_DATASET = data_catalog.create_dataset_from_link(
            self.context, test_org.guid, self.HDFS_OUTPUT_DIR + self.HDFS_OUTPUT_FILES[0])
        assert self.TEST_DATASET.size > 0
Esempio n. 8
0
def model_hdfs_path(request, test_org, add_admin_to_test_org):
    log_fixture("Create a transfer and get hdfs path")
    context = Context()
    _, data_set = data_catalog.create_dataset_from_link(context,
                                                        org=test_org,
                                                        source=Urls.model_url)
    request.addfinalizer(lambda: context.cleanup())
    return data_set.target_uri
 def test_2_create_dataset(self, class_context, core_hdfs_instance):
     step("Create dataset from url")
     _, self.__class__.dataset = create_dataset_from_link(
         class_context,
         self.test_org,
         Urls.test_transfer_link,
         client=self.test_org_manager_client)
     step("Check dataset directory in hdfs")
     hdfs_dirs = self._list_hdfs_directories(
         self.USERSPACE_PATH.format(self.test_org.guid,
                                    core_hdfs_instance.guid))
     self._assert_directory_in_hdfs(hdfs_dirs,
                                    name=self.dataset.object_store_id,
                                    owner=self.test_org_manager.guid,
                                    group=self.test_org.guid)
    def test_0_create_transfer_and_dataset_with_csv_link(
            self, class_context, test_org, test_data_urls):
        """
        <b>Description:</b>
        Create transfer and dataset from an url and publish created dataset in hue.

        <b>Input data:</b>
        1. test organization

        <b>Expected results:</b>
        Test passes when transfer and dataset are successfully created and dataset is published in hue.

        <b>Steps:</b>
        1. Create transfer and dataset.
        2. Publish created dataset in hue.
        """
        step("Create new transfer and dataset")
        self.__class__.transfer, dataset = create_dataset_from_link(
            context=class_context,
            org_guid=test_org.guid,
            source=test_data_urls.test_transfer.url)
        step("Publish dataset in HUE")
        dataset.api_publish()
Esempio n. 11
0
 def dataset(self, test_org, add_admin_to_test_org, context):
     step("Create data set")
     _, dataset = data_catalog.create_dataset_from_link(
         context, org=test_org, source=Urls.test_transfer_link)
     return dataset
def assert_dataset_greater_with_retry(value_to_compare, *args, **kwargs):
    _, dataset = data_catalog.create_dataset_from_link(*args, **kwargs)
    assert dataset.size > value_to_compare
Esempio n. 13
0
 def dataset_target_uri(self, test_org, class_context, add_admin_to_test_org, test_data_urls):
     _, dataset = create_dataset_from_link(class_context, test_org.guid, test_data_urls.test_transfer.url)
     return dataset.target_uri
Esempio n. 14
0
 def dataset(cls, request, test_org, add_admin_to_test_org, class_context):
     _, dataset = data_catalog.create_dataset_from_link(
         class_context, org=test_org, source=Urls.test_transfer_link)
     return dataset