Ejemplo n.º 1
0
    def __init__(self, connection, s3, *args,
        redshift_schema='public',
        logging_level=None,
        default_column_length=DEFAULT_COLUMN_LENGTH,
        persist_empty_tables=False,
        **kwargs):

        self.LOGGER.info(
            'RedshiftTarget created with established connection: `{}`, schema: `{}`'.format(connection.dsn,
                                                                                            redshift_schema))

        self.s3 = s3
        self.default_column_length = default_column_length
        PostgresTarget.__init__(self, connection, postgres_schema=redshift_schema, logging_level=logging_level,
                                persist_empty_tables=persist_empty_tables, add_upsert_indexes=False)
Ejemplo n.º 2
0
    def write_batch(self, stream_buffer):
        # WARNING: Using mutability here as there's no simple way to copy the necessary data over
        nullable_stream_buffer = stream_buffer
        nullable_stream_buffer.schema = _make_schema_nullable(
            stream_buffer.schema)

        return PostgresTarget.write_batch(self, nullable_stream_buffer)
Ejemplo n.º 3
0
def main(config, input_stream=None):
    with psycopg2.connect(
            connection_factory=MillisLoggingConnection,
            host=config.get('postgres_host', 'localhost'),
            port=config.get('postgres_port', 5432),
            dbname=config.get('postgres_database'),
            user=config.get('postgres_username'),
            password=config.get('postgres_password'),
            sslmode=config.get('postgres_sslmode'),
            sslcert=config.get('postgres_sslcert'),
            sslkey=config.get('postgres_sslkey'),
            sslrootcert=config.get('postgres_sslrootcert'),
            sslcrl=config.get('postgres_sslcrl'),
            application_name=config.get('application_name', 'target-postgres'),
    ) as connection:
        postgres_target = PostgresTarget(
            connection,
            postgres_schema=config.get('postgres_schema', 'public'),
            logging_level=config.get('logging_level'),
            persist_empty_tables=config.get('persist_empty_tables'),
            add_upsert_indexes=config.get('add_upsert_indexes', True),
            before_run_sql=config.get('before_run_sql'),
            after_run_sql=config.get('after_run_sql'),
        )

        if input_stream:
            target_tools.stream_to_target(input_stream,
                                          postgres_target,
                                          config=config)
        else:
            target_tools.main(postgres_target)
Ejemplo n.º 4
0
    def sql_type_to_json_schema(self, sql_type, is_nullable):
        if sql_type == 'character varying':
            schema = {'type': [json_schema.STRING]}
            if is_nullable:
                return json_schema.make_nullable(schema)
            return schema

        return PostgresTarget.sql_type_to_json_schema(self, sql_type, is_nullable)
Ejemplo n.º 5
0
    def write_batch(self, stream_buffer):
        # WARNING: Using mutability here as there's no simple way to copy the necessary data over
        self.LOGGER.info('write_batch: Schema before nullability: {}'.format(stream_buffer.schema))
        nullable_stream_buffer = stream_buffer
        nullable_stream_buffer.schema = _make_schema_nullable(stream_buffer.schema)

        self.LOGGER.info('write_batch: Schema after nullability: {}'.format(stream_buffer.schema))

        return PostgresTarget.write_batch(self, nullable_stream_buffer)
Ejemplo n.º 6
0
    def upsert_table_helper(self, connection, table_schema, metadata, log_schema_changes=True):
        self.LOGGER.info('upsert_table_helper: Schema before nullability: {}'.format(table_schema))

        nullable_table_schema = deepcopy(table_schema)
        nullable_table_schema['schema'] = _make_schema_nullable(nullable_table_schema['schema'])
        self.LOGGER.info('upsert_table_helper: Schema after nullability: {}'.format(nullable_table_schema))
        return PostgresTarget.upsert_table_helper(self,
                                                  connection,
                                                  nullable_table_schema,
                                                  metadata,
                                                  log_schema_changes=log_schema_changes)
Ejemplo n.º 7
0
    def json_schema_to_sql_type(self, schema):
        psql_type = PostgresTarget.json_schema_to_sql_type(self, schema)

        max_length = schema.get('maxLength', self.default_column_length)
        if max_length > self.MAX_VARCHAR:
            max_length = self.MAX_VARCHAR

        if psql_type.upper() == 'TEXT':
            return 'varchar({})'.format(max_length)
        elif psql_type.upper() == 'TEXT NOT NULL':
            return 'varchar({}) NOT NULL'.format(max_length)

        return psql_type
Ejemplo n.º 8
0
 def upsert_table_helper(self,
                         connection,
                         table_schema,
                         metadata,
                         log_schema_changes=True):
     nullable_table_schema = deepcopy(table_schema)
     nullable_table_schema['schema'] = _make_schema_nullable(
         nullable_table_schema['schema'])
     return PostgresTarget.upsert_table_helper(
         self,
         connection,
         nullable_table_schema,
         metadata,
         log_schema_changes=log_schema_changes)
Ejemplo n.º 9
0
def main(config, input_stream=None):
    streams = {}
    try:
        if not config.get('disable_collection', False):
            LOGGER.info('Sending version information to singer.io. ' +
                        'To disable sending anonymous usage data, set ' +
                        'the config parameter "disable_collection" to true')
            threading.Thread(target=send_usage_stats).start()

        connection = psycopg2.connect(host=config.get('postgres_host',
                                                      'localhost'),
                                      port=config.get('postgres_port', 5432),
                                      dbname=config.get('postgres_database'),
                                      user=config.get('postgres_username'),
                                      password=config.get('postgres_password'))

        postgres_target = PostgresTarget(connection,
                                         LOGGER,
                                         postgres_schema=config.get(
                                             'postgres_schema', 'public'))

        invalid_records_detect = config.get('invalid_records_detect')
        invalid_records_threshold = config.get('invalid_records_threshold')
        max_batch_rows = config.get('max_batch_rows')
        max_batch_size = config.get('max_batch_size')
        batch_detection_threshold = config.get('batch_detection_threshold',
                                               5000)

        if not input_stream:
            input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')

        line_count = 0
        for line in input_stream:
            line_handler(streams, postgres_target, invalid_records_detect,
                         invalid_records_threshold, max_batch_rows,
                         max_batch_size, line)
            if line_count > 0 and line_count % batch_detection_threshold == 0:
                flush_streams(streams, postgres_target)
            line_count += 1

        flush_streams(streams, postgres_target, force=True)

        connection.close()
    except Exception as e:
        LOGGER.critical(e)
        raise e
    finally:
        report_invalid_records(streams)
Ejemplo n.º 10
0
    def json_schema_to_sql_type(self, schema):
        psql_type = PostgresTarget.json_schema_to_sql_type(self, schema)

        assert not 'NOT NULL' in psql_type, 'Redshift does not support `NOT NULL` without a default. Got: {}'.format(
            psql_type,
            schema
        )

        max_length = schema.get('maxLength', self.default_column_length)
        if max_length > self.MAX_VARCHAR:
            max_length = self.MAX_VARCHAR

        if psql_type.upper() == 'TEXT':
            return 'varchar({})'.format(max_length)

        return psql_type
Ejemplo n.º 11
0
def main(config, input_stream=None):
    with psycopg2.connect(
            host=config.get('postgres_host', 'localhost'),
            port=config.get('postgres_port', 5432),
            dbname=config.get('postgres_database'),
            user=config.get('postgres_username'),
            password=config.get('postgres_password')) as connection:
        postgres_target = PostgresTarget(connection,
                                         postgres_schema=config.get(
                                             'postgres_schema', 'public'))

        if input_stream:
            target_tools.stream_to_target(input_stream,
                                          postgres_target,
                                          config=config)
        else:
            target_tools.main(postgres_target)
Ejemplo n.º 12
0
def main(config, input_stream=None):
    with psycopg2.connect(
            connection_factory=MillisLoggingConnection,
            host=config.get('postgres_host', 'localhost'),
            port=config.get('postgres_port', 5432),
            dbname=config.get('postgres_database'),
            user=config.get('postgres_username'),
            password=config.get('postgres_password')) as connection:
        postgres_target = PostgresTarget(
            connection,
            postgres_schema=config.get('postgres_schema', 'public'),
            logging_level=config.get('logging_level'),
            persist_empty_tables=config.get('persist_empty_tables'))

        if input_stream:
            target_tools.stream_to_target(input_stream,
                                          postgres_target,
                                          config=config)
        else:
            target_tools.main(postgres_target)
Ejemplo n.º 13
0
 def add_column(self, cur, table_name, column_name, column_schema):
     self.LOGGER.info('add_column({}, {}, {})'.format(
         table_name, column_name, column_schema
     ))
     PostgresTarget.add_column(self, cur, table_name, column_name, column_schema)