def __init__(self, connection, s3, *args, redshift_schema='public', logging_level=None, default_column_length=DEFAULT_COLUMN_LENGTH, persist_empty_tables=False, **kwargs): self.LOGGER.info( 'RedshiftTarget created with established connection: `{}`, schema: `{}`'.format(connection.dsn, redshift_schema)) self.s3 = s3 self.default_column_length = default_column_length PostgresTarget.__init__(self, connection, postgres_schema=redshift_schema, logging_level=logging_level, persist_empty_tables=persist_empty_tables, add_upsert_indexes=False)
def write_batch(self, stream_buffer): # WARNING: Using mutability here as there's no simple way to copy the necessary data over nullable_stream_buffer = stream_buffer nullable_stream_buffer.schema = _make_schema_nullable( stream_buffer.schema) return PostgresTarget.write_batch(self, nullable_stream_buffer)
def main(config, input_stream=None): with psycopg2.connect( connection_factory=MillisLoggingConnection, host=config.get('postgres_host', 'localhost'), port=config.get('postgres_port', 5432), dbname=config.get('postgres_database'), user=config.get('postgres_username'), password=config.get('postgres_password'), sslmode=config.get('postgres_sslmode'), sslcert=config.get('postgres_sslcert'), sslkey=config.get('postgres_sslkey'), sslrootcert=config.get('postgres_sslrootcert'), sslcrl=config.get('postgres_sslcrl'), application_name=config.get('application_name', 'target-postgres'), ) as connection: postgres_target = PostgresTarget( connection, postgres_schema=config.get('postgres_schema', 'public'), logging_level=config.get('logging_level'), persist_empty_tables=config.get('persist_empty_tables'), add_upsert_indexes=config.get('add_upsert_indexes', True), before_run_sql=config.get('before_run_sql'), after_run_sql=config.get('after_run_sql'), ) if input_stream: target_tools.stream_to_target(input_stream, postgres_target, config=config) else: target_tools.main(postgres_target)
def sql_type_to_json_schema(self, sql_type, is_nullable): if sql_type == 'character varying': schema = {'type': [json_schema.STRING]} if is_nullable: return json_schema.make_nullable(schema) return schema return PostgresTarget.sql_type_to_json_schema(self, sql_type, is_nullable)
def write_batch(self, stream_buffer): # WARNING: Using mutability here as there's no simple way to copy the necessary data over self.LOGGER.info('write_batch: Schema before nullability: {}'.format(stream_buffer.schema)) nullable_stream_buffer = stream_buffer nullable_stream_buffer.schema = _make_schema_nullable(stream_buffer.schema) self.LOGGER.info('write_batch: Schema after nullability: {}'.format(stream_buffer.schema)) return PostgresTarget.write_batch(self, nullable_stream_buffer)
def upsert_table_helper(self, connection, table_schema, metadata, log_schema_changes=True): self.LOGGER.info('upsert_table_helper: Schema before nullability: {}'.format(table_schema)) nullable_table_schema = deepcopy(table_schema) nullable_table_schema['schema'] = _make_schema_nullable(nullable_table_schema['schema']) self.LOGGER.info('upsert_table_helper: Schema after nullability: {}'.format(nullable_table_schema)) return PostgresTarget.upsert_table_helper(self, connection, nullable_table_schema, metadata, log_schema_changes=log_schema_changes)
def json_schema_to_sql_type(self, schema): psql_type = PostgresTarget.json_schema_to_sql_type(self, schema) max_length = schema.get('maxLength', self.default_column_length) if max_length > self.MAX_VARCHAR: max_length = self.MAX_VARCHAR if psql_type.upper() == 'TEXT': return 'varchar({})'.format(max_length) elif psql_type.upper() == 'TEXT NOT NULL': return 'varchar({}) NOT NULL'.format(max_length) return psql_type
def upsert_table_helper(self, connection, table_schema, metadata, log_schema_changes=True): nullable_table_schema = deepcopy(table_schema) nullable_table_schema['schema'] = _make_schema_nullable( nullable_table_schema['schema']) return PostgresTarget.upsert_table_helper( self, connection, nullable_table_schema, metadata, log_schema_changes=log_schema_changes)
def main(config, input_stream=None): streams = {} try: if not config.get('disable_collection', False): LOGGER.info('Sending version information to singer.io. ' + 'To disable sending anonymous usage data, set ' + 'the config parameter "disable_collection" to true') threading.Thread(target=send_usage_stats).start() connection = psycopg2.connect(host=config.get('postgres_host', 'localhost'), port=config.get('postgres_port', 5432), dbname=config.get('postgres_database'), user=config.get('postgres_username'), password=config.get('postgres_password')) postgres_target = PostgresTarget(connection, LOGGER, postgres_schema=config.get( 'postgres_schema', 'public')) invalid_records_detect = config.get('invalid_records_detect') invalid_records_threshold = config.get('invalid_records_threshold') max_batch_rows = config.get('max_batch_rows') max_batch_size = config.get('max_batch_size') batch_detection_threshold = config.get('batch_detection_threshold', 5000) if not input_stream: input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') line_count = 0 for line in input_stream: line_handler(streams, postgres_target, invalid_records_detect, invalid_records_threshold, max_batch_rows, max_batch_size, line) if line_count > 0 and line_count % batch_detection_threshold == 0: flush_streams(streams, postgres_target) line_count += 1 flush_streams(streams, postgres_target, force=True) connection.close() except Exception as e: LOGGER.critical(e) raise e finally: report_invalid_records(streams)
def json_schema_to_sql_type(self, schema): psql_type = PostgresTarget.json_schema_to_sql_type(self, schema) assert not 'NOT NULL' in psql_type, 'Redshift does not support `NOT NULL` without a default. Got: {}'.format( psql_type, schema ) max_length = schema.get('maxLength', self.default_column_length) if max_length > self.MAX_VARCHAR: max_length = self.MAX_VARCHAR if psql_type.upper() == 'TEXT': return 'varchar({})'.format(max_length) return psql_type
def main(config, input_stream=None): with psycopg2.connect( host=config.get('postgres_host', 'localhost'), port=config.get('postgres_port', 5432), dbname=config.get('postgres_database'), user=config.get('postgres_username'), password=config.get('postgres_password')) as connection: postgres_target = PostgresTarget(connection, postgres_schema=config.get( 'postgres_schema', 'public')) if input_stream: target_tools.stream_to_target(input_stream, postgres_target, config=config) else: target_tools.main(postgres_target)
def main(config, input_stream=None): with psycopg2.connect( connection_factory=MillisLoggingConnection, host=config.get('postgres_host', 'localhost'), port=config.get('postgres_port', 5432), dbname=config.get('postgres_database'), user=config.get('postgres_username'), password=config.get('postgres_password')) as connection: postgres_target = PostgresTarget( connection, postgres_schema=config.get('postgres_schema', 'public'), logging_level=config.get('logging_level'), persist_empty_tables=config.get('persist_empty_tables')) if input_stream: target_tools.stream_to_target(input_stream, postgres_target, config=config) else: target_tools.main(postgres_target)
def add_column(self, cur, table_name, column_name, column_schema): self.LOGGER.info('add_column({}, {}, {})'.format( table_name, column_name, column_schema )) PostgresTarget.add_column(self, cur, table_name, column_name, column_schema)