Ejemplo n.º 1
0
    def test_add_manifest_copy(self):
        schema = JsonObject(TABLE_NAME, Property('id', 'VARCHAR(36)'))
        bucket = Mock()
        database = create_autospec(Database)
        expected = ManifestCopyFromS3JsonStep(
            metadata='',
            source='',
            schema=schema,
            aws_access_key_id=AWS_ACCESS_KEY_ID,
            aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
            bucket=bucket,
            table=TargetTable(schema, database))

        pipeline = S3CopyPipeline(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                                  bucket, database)
        pipeline.manifest_copy(metadata='', source='', schema=schema)

        step = pipeline.steps()[0]

        self.assertEqual(expected.metadata, step.metadata)
        self.assertEqual(expected.source, step.source)
        self.assertEqual(expected.schema, step.schema)
        self.assertEqual(expected.aws_access_key_id, step.aws_access_key_id)
        self.assertEqual(expected.aws_secret_access_key,
                         step.aws_secret_access_key)
        self.assertEqual(expected.bucket, step.bucket)
        self.assertEqual(expected.table.schema, step.table.schema)
        self.assertEqual(expected.table.database, step.table.database)
Ejemplo n.º 2
0
    def test_add_sql(self):
        bucket = Mock()
        database = create_autospec(Database)
        expected = SqlStep(database,
                           ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1),
                           ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2),
                           ("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME)))

        pipeline = S3CopyPipeline(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY,
                                  bucket, database)

        pipeline.sql(("INSERT INTO %s VALUES('%s')", TABLE_NAME, 1),
                     ("INSERT INTO %s VALUES('%s')", TABLE_NAME, 2),
                     ("INSERT INTO {0} VALUES ('3')".format(TABLE_NAME)))

        step = pipeline.steps()[0]

        self.assertEqual(expected.statements, step.statements)
{
  "id": "66bc8153-d6d9-4351-bada-803330f22db7",
  "someNumber": 1
}
```

schema: Definition of JSON objects to map into Redshift rows using a
`JsonObject` mapper which consists of one or many `Property` declarations.
By default the name of the JSON property is used as the column, but can be set
to a custom column name.
"""

if __name__ == '__main__':
    pipeline = S3CopyPipeline(
        aws_access_key_id=env('AWS_ACCESS_KEY_ID'),
        aws_secret_access_key=env('AWS_SECRET_ACCESS_KEY'),
        bucket=env('BUCKET_NAME'),
        db_connection=psycopg2.connect(env('REDSHIFT_CONNECTION')))

    pipeline.bulk_copy(metadata='path_to_save_pipeline_metadata',
                       source='path_of_source_data',
                       schema=JsonObject(
                           'destination_table_name',
                           Property('id', 'VARCHAR(36)'),
                           Property('someNumber', 'INTEGER',
                                    'custom_column_name')))

    pipeline.manifest_copy(metadata='path_to_save_pipeline_metadata',
                           source='path_of_incremental_source_data',
                           schema=JsonObject(
                               'incremental_destination_table_name',
Ejemplo n.º 4
0
 def test_throw_pipeline_exception_when_no_steps_on_validate(self):
     self.assertRaises(
         PipelineException,
         S3CopyPipeline(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, Mock(),
                        Mock()).validate)