Example #1
0
def from_manifest(manifest, destination, **ingestion_args):
    '''From a dict that can be jsonified and uploaded to S3. For more info on manifests,
       see http://docs.aws.amazon.com/redshift/latest/dg/loading-data-files-using-manifest.html'''
    s3_path = _transient_s3_path(destination) + '.manifest'
    s3_manifest = S3File.from_json_serializable(manifest, s3_path)

    s3_to_redshift(s3_manifest, destination, with_manifest=True, **ingestion_args)
Example #2
0
    def test_s3_file_upload_and_download_with_path_object(self):
        s3_file = S3File.from_local_file(local_path=self.S3_FILE_UPLOAD_PATH,
                                         s3_path=self.PATH_OBJECT)
        s3_file.download(destination_path=self.S3_FILE_DOWNLOAD_PATH)

        with open(self.S3_FILE_DOWNLOAD_PATH, 'r') as downloaded_file:
            self.assertEqual(downloaded_file.read(), self.S3_FILE_CONTENTS)
Example #3
0
    def test_s3_file_from_in_memory_data_upload_and_download(self):
        s3_file = S3File.from_in_memory_data(data=self.IN_MEMORY_DATA,
                                             s3_path=self.S3_PATH)
        s3_file.download(destination_path=self.S3_FILE_DOWNLOAD_PATH)

        data_uploaded_to_s3 = self._read_csv_as_list_of_tuples(
            self.S3_FILE_DOWNLOAD_PATH)
        self.assertEqual(data_uploaded_to_s3, self.IN_MEMORY_DATA)
Example #4
0
    def test_dictionary_becomes_json_file_in_s3(self):
        file = S3File.from_json_serializable(data=self.DATA,
                                             s3_path=self.S3_PATH)

        temp_path = file.download_to_temp()
        with open(temp_path) as file:
            actual_data = json.load(file)
        self.assertEqual(actual_data, self.DATA)
Example #5
0
    def test_upsert_audit(self):
        s3_file = S3File.from_local_file(local_path=self.LOCAL_FILE_PATH,
                                         s3_path=self.S3_PATH)

        s3_to_redshift(
            s3_file,
            RedshiftTable(self.DB_CONNECTION, self.TABLE,
                          self.UPSERT_UNIQUENESS_KEY))

        recorded_audit_data = self.DB_CONNECTION.fetch(
            self.AUDIT_TABLE_CONTENTS_QUERY)
        self.assertEqual(recorded_audit_data, self.EXPECTED_AUDIT_DATA)
Example #6
0
    def test_data_in_redshift(self):
        '''Because our destination database, `BasicRedshiftButActuallyPostgres`, is only pretending
        to be a Redshift database, the s3_to_redshift method should successfully
        move a local .csv file to it.'''

        s3_file = S3File.from_local_file(local_path=self.LOCAL_FILE_PATH,
                                         s3_path=self.S3_PATH)

        s3_to_redshift(
            s3_file,
            RedshiftTable(self.DB_CONNECTION, self.TABLE,
                          self.UPSERT_UNIQUENESS_KEY))

        current_data_in_table = self.DB_CONNECTION.fetch(
            self.DB_SELECT_ALL_QUERY)
        self.assertEqual(current_data_in_table, self.FILE_CONTENTS)
Example #7
0
    def test_vacuum_errors_are_swallowed(self, database_execute):
        database_execute.side_effect = [
            'pre_upsert_audit_table_insert_statement',
            'post_upsert_audit_table_update_statement',
            NotSupportedError(
                "VACUUM is running. HINT: re-execute after other vacuum finished"
            )
        ]

        s3_file = S3File.from_local_file(local_path=self.LOCAL_FILE_PATH,
                                         s3_path=self.S3_PATH)

        try:
            s3_to_redshift(
                s3_file,
                RedshiftTable(self.DB_CONNECTION, self.TABLE,
                              self.UPSERT_UNIQUENESS_KEY))
        except BaseException:
            self.fail('nothing should have errored here unexpectedly!')
Example #8
0
 def test_s3_file_size(self):
     s3_file = S3File.from_local_file(local_path=self.S3_FILE_UPLOAD_PATH,
                                      s3_path=self.S3_PATH)
     expected_file_size = os.path.getsize(self.S3_FILE_UPLOAD_PATH)
     self.assertEqual(s3_file.file_size, expected_file_size)
Example #9
0
 def test_s3_file_from_in_memory_data_factory(self):
     s3_file = S3File.from_in_memory_data(data=self.IN_MEMORY_DATA,
                                          s3_path=self.S3_PATH)
     self.assertIsInstance(s3_file, S3File)
Example #10
0
 def test_s3_file_factory(self):
     s3_file = S3File.from_local_file(local_path=self.S3_FILE_UPLOAD_PATH,
                                      s3_path=self.S3_PATH)
     self.assertIsInstance(s3_file, S3File)
Example #11
0
    def test_init_with_path_object_sets_key(self):
        s3_file = S3File(s3_path=self.PATH_OBJECT)

        self.assertEqual(s3_file.key_name, self.KEY_NAME)
Example #12
0
 def test_s3_key_name(self):
     s3_file = S3File(self.S3_PATH)
     self.assertEqual(s3_file.key_name, self.S3_KEY_NAME)
Example #13
0
def from_local_file(file_path, destination):
    '''Assumes a CSV'''
    s3_path = _transient_s3_path(destination) + '.csv'
    s3_file = S3File.from_local_file(file_path, s3_path)

    from_s3_file(s3_file, destination)
Example #14
0
def from_s3_path(s3_path, destination):
    '''Assumes a CSV'''
    s3_file = S3File(s3_path)
    from_s3_file(s3_file, destination)
Example #15
0
 def test_file_size_of_non_existent_file_equals_0(self):
     self.assertEqual(S3File(self.S3_PATH).file_size, 0)
Example #16
0
    def test_init_with_path_object_sets_bucket(self):
        s3_file = S3File(s3_path=self.PATH_OBJECT)

        self.assertEqual(s3_file.bucket_name, self.S3_BUCKET_NAME)
Example #17
0
 def test_s3_bucket_name(self):
     s3_file = S3File(self.S3_PATH)
     self.assertEqual(s3_file.bucket_name, self.S3_BUCKET_NAME)
Example #18
0
 def __init__(self, file_path, destination, **kwargs):
     local_file_path = S3File(file_path).download_to_temp()
     super().__init__(local_file_path, destination, **kwargs)
     if self.with_manifest:
         raise ValueError(
             "Postgres cannot handle manifests like redshift. Sorry.")