Exemple #1
0
    def test_executes_with_empty_source_objects(self, mock_hook):
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET, source_objects=SOURCE_OBJECTS_NO_FILE
        )

        operator.execute(None)
        mock_hook.return_value.list.assert_called_once_with(TEST_BUCKET, prefix='', delimiter=None)
Exemple #2
0
    def test_raises_exception_with_two_empty_list_inside_source_objects(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_OBJECTS_LIST
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET, source_objects=SOURCE_OBJECTS_TWO_EMPTY_STRING
        )

        with pytest.raises(AirflowException, match="You can't have two empty strings inside source_object"):
            operator.execute(None)
Exemple #3
0
    def test_execute_no_suffix(self, mock_hook):
        operator = GCSToGCSOperator(
            task_id=TASK_ID,
            source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_WILDCARD_SUFFIX,
            destination_bucket=DESTINATION_BUCKET,
        )

        operator.execute(None)
        mock_hook.return_value.list.assert_called_once_with(TEST_BUCKET, prefix="test_object", delimiter="")
Exemple #4
0
    def test_no_prefix_with_last_modified_time_with_false_cond(self, mock_hook):
        mock_hook.return_value.is_updated_after.return_value = False
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_NO_WILDCARD,
            destination_bucket=DESTINATION_BUCKET,
            destination_object=SOURCE_OBJECT_NO_WILDCARD,
            last_modified_time=MOD_TIME_1)

        operator.execute(None)
        mock_hook.return_value.rewrite.assert_not_called()
Exemple #5
0
    def test_execute_no_prefix_with_no_last_modified_time(self, mock_hook):
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_NO_WILDCARD,
            destination_bucket=DESTINATION_BUCKET,
            destination_object=SOURCE_OBJECT_NO_WILDCARD,
            last_modified_time=None)

        operator.execute(None)
        mock_hook.return_value.rewrite.assert_called_once_with(
            TEST_BUCKET, 'test_object.txt', DESTINATION_BUCKET, 'test_object.txt')
Exemple #6
0
 def test_executes_with_multiple_items_in_source_objects(self, mock_hook):
     operator = GCSToGCSOperator(
         task_id=TASK_ID, source_bucket=TEST_BUCKET, source_objects=SOURCE_OBJECTS_MULTIPLE_FILES
     )
     operator.execute(None)
     mock_hook.return_value.list.assert_has_calls(
         [
             mock.call(TEST_BUCKET, prefix='test_object/file1.txt', delimiter=None),
             mock.call(TEST_BUCKET, prefix='test_object/file2.txt', delimiter=None),
         ],
         any_order=True,
     )
Exemple #7
0
 def test_executes_with_no_destination_bucket_and_no_destination_object(self, mock_hook):
     mock_hook.return_value.list.return_value = SOURCE_OBJECTS_LIST
     operator = GCSToGCSOperator(
         task_id=TASK_ID, source_bucket=TEST_BUCKET, source_objects=SOURCE_OBJECTS_LIST
     )
     operator.execute(None)
     mock_calls = [
         mock.call(TEST_BUCKET, 'test_object/file1.txt', TEST_BUCKET, 'test_object/file1.txt'),
         mock.call(TEST_BUCKET, 'test_object/file2.txt', TEST_BUCKET, 'test_object/file2.txt'),
         mock.call(TEST_BUCKET, 'test_object/file3.json', TEST_BUCKET, 'test_object/file3.json'),
     ]
     mock_hook.return_value.rewrite.assert_has_calls(mock_calls)
Exemple #8
0
    def test_no_prefix_with_maximum_modified_time_with_true_cond(self, mock_hook):
        mock_hook.return_value.is_updated_before.return_value = True
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_NO_WILDCARD,
            destination_bucket=DESTINATION_BUCKET,
            destination_object=SOURCE_OBJECT_NO_WILDCARD,
            maximum_modified_time=MOD_TIME_1)

        operator.execute(None)
        mock_hook.return_value.rewrite.assert_called_once_with(
            TEST_BUCKET, 'test_object.txt', DESTINATION_BUCKET, 'test_object.txt')
Exemple #9
0
    def test_wc_with_last_modified_time_with_one_true_cond(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_FILES_LIST
        mock_hook.return_value.is_updated_after.side_effect = [True, False, False]
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_WILDCARD_FILENAME,
            destination_bucket=DESTINATION_BUCKET,
            last_modified_time=MOD_TIME_1)

        operator.execute(None)
        mock_hook.return_value.rewrite.assert_called_once_with(
            TEST_BUCKET, 'test_object/file1.txt',
            DESTINATION_BUCKET, 'test_object/file1.txt')
Exemple #10
0
    def test_executes_with_is_older_than_with_true_cond(self, mock_hook):
        mock_hook.return_value.is_older_than.return_value = True
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_NO_WILDCARD,
            destination_bucket=DESTINATION_BUCKET,
            destination_object=SOURCE_OBJECT_NO_WILDCARD,
            last_modified_time=MOD_TIME_1,
            maximum_modified_time=MOD_TIME_2,
            is_older_than=3600)

        operator.execute(None)
        mock_hook.return_value.rewrite.assert_called_once_with(
            TEST_BUCKET, 'test_object.txt', DESTINATION_BUCKET, 'test_object.txt')
Exemple #11
0
    def test_execute_wildcard_with_replace_flag_false(self, mock_hook):
        operator = GCSToGCSOperator(
            task_id=TASK_ID,
            source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_WILDCARD_SUFFIX,
            destination_bucket=DESTINATION_BUCKET,
            replace=False)

        operator.execute(None)
        mock_calls = [
            mock.call(TEST_BUCKET, prefix="test_object", delimiter=""),
            mock.call(DESTINATION_BUCKET, prefix="test_object", delimiter=""),
        ]
        mock_hook.return_value.list.assert_has_calls(mock_calls)
Exemple #12
0
    def test_execute_with_empty_destination_bucket(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_FILES_LIST
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_NO_WILDCARD,
            destination_bucket=None,
            destination_object=DESTINATION_OBJECT_PREFIX)

        with mock.patch.object(operator.log, 'warning') as mock_warn:
            operator.execute(None)
            mock_warn.assert_called_once_with(
                'destination_bucket is None. Defaulting it to source_bucket (%s)',
                TEST_BUCKET
            )
            self.assertEqual(operator.destination_bucket, operator.source_bucket)
Exemple #13
0
    def test_execute_more_than_1_wildcard(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_FILES_LIST
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_MULTIPLE_WILDCARDS,
            destination_bucket=DESTINATION_BUCKET,
            destination_object=DESTINATION_OBJECT_PREFIX)

        total_wildcards = operator.source_object.count(WILDCARD)

        error_msg = "Only one wildcard '[*]' is allowed in source_object parameter. " \
                    "Found {}".format(total_wildcards)

        with self.assertRaisesRegex(AirflowException, error_msg):
            operator.execute(None)
Exemple #14
0
    def test_executes_with_delimiter_and_destination_object(self, mock_hook):
        mock_hook.return_value.list.return_value = ['test_object/file3.json']
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_objects=SOURCE_OBJECTS_LIST,
            destination_bucket=DESTINATION_BUCKET,
            destination_object=DESTINATION_OBJECT,
            delimiter=DELIMITER)

        operator.execute(None)
        mock_calls = [
            mock.call(TEST_BUCKET, 'test_object/file3.json',
                      DESTINATION_BUCKET, DESTINATION_OBJECT),
        ]
        mock_hook.return_value.rewrite.assert_has_calls(mock_calls)
Exemple #15
0
    def test_execute_wildcard_without_destination_object(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_FILES_LIST
        operator = GCSToGCSOperator(
            task_id=TASK_ID,
            source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_WILDCARD_FILENAME,
            destination_bucket=DESTINATION_BUCKET,
        )

        operator.execute(None)
        mock_calls_none = [
            mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt'),
            mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'test_object/file2.txt'),
        ]
        mock_hook.return_value.rewrite.assert_has_calls(mock_calls_none)
Exemple #16
0
    def test_wc_with_last_modified_time_with_all_true_cond_no_file(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_OBJECTS_LIST
        mock_hook.return_value.is_updated_after.side_effect = [True, True, True]
        operator = GCSToGCSOperator(
            task_id=TASK_ID,
            source_bucket=TEST_BUCKET,
            source_objects=SOURCE_OBJECTS_NO_FILE,
            destination_bucket=DESTINATION_BUCKET,
            last_modified_time=MOD_TIME_1,
        )

        operator.execute(None)
        mock_calls_none = [
            mock.call(TEST_BUCKET, 'test_object/file1.txt', DESTINATION_BUCKET, 'test_object/file1.txt'),
            mock.call(TEST_BUCKET, 'test_object/file2.txt', DESTINATION_BUCKET, 'test_object/file2.txt'),
            mock.call(TEST_BUCKET, 'test_object/file3.json', DESTINATION_BUCKET, 'test_object/file3.json'),
        ]
        mock_hook.return_value.rewrite.assert_has_calls(mock_calls_none)
Exemple #17
0
    def test_execute_wildcard_with_destination_object_retained_prefix(self, mock_hook):
        mock_hook.return_value.list.return_value = SOURCE_FILES_LIST
        operator = GCSToGCSOperator(
            task_id=TASK_ID, source_bucket=TEST_BUCKET,
            source_object=SOURCE_OBJECT_WILDCARD_FILENAME,
            destination_bucket=DESTINATION_BUCKET,
            destination_object='{}/{}'.format(DESTINATION_OBJECT_PREFIX,
                                              SOURCE_OBJECT_WILDCARD_SUFFIX[:-1])
        )

        operator.execute(None)
        mock_calls_retained = [
            mock.call(TEST_BUCKET, 'test_object/file1.txt',
                      DESTINATION_BUCKET, 'foo/bar/test_object/file1.txt'),
            mock.call(TEST_BUCKET, 'test_object/file2.txt',
                      DESTINATION_BUCKET, 'foo/bar/test_object/file2.txt'),
        ]
        mock_hook.return_value.rewrite.assert_has_calls(mock_calls_retained)