def test_datalake(self):

        with tempfile.TemporaryDirectory() as dir_name:
            sub_dir_name = os.path.join(dir_name, 'sub/')
            os.makedirs(sub_dir_name)
            paths = set([
                os.path.join(dir_name, 'file1.txt'),
                os.path.join(dir_name, 'file2.txt'),
                os.path.join(dir_name, 'file3.txt'),
                os.path.join(sub_dir_name, 'file4.txt'),
            ])
            for path in paths:
                with open(path, 'w') as f:
                    f.write('dummy')

            path_iter = generate_upload_file_iter(paths=[dir_name],
                                                  recursive=True)
            upload_success, _ = upload_to_datalake(DATALAKE_CHANNEL_ID,
                                                   path_iter, None)
            # list of uploaded file paths
            upload_sources = map(lambda x: x[0], upload_success)
            # list of uploaded file identifiers
            upload_destinations = map(lambda x: x[1], upload_success)
            self.assertSetEqual(set(upload_sources), paths)

            today = datetime.utcnow().date().strftime("%Y%m%d")
            file_iter = generate_channel_file_iter_by_period(
                DATALAKE_CHANNEL_ID, today, today)

        with tempfile.TemporaryDirectory() as dir_name:
            download_success, _ = download_from_datalake(
                DATALAKE_CHANNEL_ID, file_iter, dir_name, 'name', False)
            # list of downloaded file identifiers
            download_sources = list(map(lambda x: x[0], download_success))
            for upload_file_id in upload_destinations:
                # check if all uploaded files are downloaded
                self.assertTrue(upload_file_id in download_sources)

        with tempfile.TemporaryDirectory() as dir_name:
            download_success, _ = download_from_datalake(
                DATALAKE_CHANNEL_ID, file_iter, dir_name, 'id', False)
            # list of downloaded file identifiers
            download_sources = list(map(lambda x: x[0], download_success))
            for upload_file_id in upload_destinations:
                # check if all uploaded files are downloaded
                self.assertTrue(upload_file_id in download_sources)
Exemple #2
0
 def test_iter_dir_with_invalid_path(self):
     invalid_path = '/invalid'
     file_iter = generate_upload_file_iter([invalid_path])
     with self.assertRaises(InvalidPathException) as context:
         list(file_iter)
     assert context.exception.path == invalid_path
Exemple #3
0
 def test_iter_dir_with_reject(self):
     dir_path = '/dummy'
     file_iter = generate_upload_file_iter([dir_path], recursive=False)
     with self.assertRaises(InvalidPathException) as context:
         list(file_iter)
     assert context.exception.path == dir_path
Exemple #4
0
 def test_iter_file(self):
     file = '/dummy/file1.txt'
     file_iter = generate_upload_file_iter([file])
     file_set = self.upload_files_to_path_set(file_iter)
     self.assertSetEqual(file_set, set([file]))
Exemple #5
0
 def test_iter_dir_hidden(self):
     file_iter = generate_upload_file_iter(['/dummy'],
                                           recursive=True,
                                           ignore_hidden_files=False)
     file_set = self.upload_files_to_path_set(file_iter)
     self.assertSetEqual(file_set, ALL_FILES)
Exemple #6
0
 def test_iter_dir(self):
     file_iter = generate_upload_file_iter(['/dummy'], recursive=True)
     file_set = self.upload_files_to_path_set(file_iter)
     self.assertSetEqual(file_set, REGULAR_FILE_SET)