Example #1
0
    def test_safe_column_name_is_null(self):
        """
        Given a null word, we should get null back
        """
        input_name = None

        self.assertIsNone(utils.safe_column_name(input_name))
Example #2
0
    def test_safe_column_name_case_1(self):
        """
        Given an all lower case word would be wrapped in double quotes and capitalized
        """
        input_name = 'group'

        self.assertEqual('"GROUP"', utils.safe_column_name(input_name))
Example #3
0
    def test_safe_column_name_case_4(self):
        """
        Given a mixed-case word would be wrapped in backticks and capitalized
        """
        input_name = 'CA se'

        self.assertEqual('`CA SE`', utils.safe_column_name(input_name, '`'))
Example #4
0
    def test_safe_column_name_case_3(self):
        """
        Given a mixed-case word would be wrapped in double quotes and capitalized
        """
        input_name = 'CA se'

        self.assertEqual('"CA SE"', utils.safe_column_name(input_name))
Example #5
0
    def test_safe_column_name_case_2(self):
        """
        Given an all lower case word would be wrapped in backticks and capitalized
        """
        input_name = 'group'

        self.assertEqual('`GROUP`', utils.safe_column_name(input_name, '`'))
Example #6
0
    def generate_transformations(cls, tap: Dict) -> List[Dict]:
        """
        Generate the transformations data from the given tap config
        Args:
            tap: the tap config dictionary

        Returns: List of transformations
        """
        transformations = []

        for schema in tap.get('schemas', []):
            schema_name = schema.get('source_schema')
            for table in schema.get('tables', []):
                table_name = table.get('table_name')
                for trans in table.get('transformations', []):
                    transformations.append({
                        'tap_stream_name':
                        utils.get_tap_stream_name(tap,
                                                  tap['db_conn'].get('dbname'),
                                                  schema_name, table_name),
                        'field_id':
                        trans['column'],
                        # Make column name safe by wrapping it in quotes, it's useful when a field_id is a reserved
                        # word to be used by target snowflake in fastsync
                        'safe_field_id':
                        safe_column_name(trans['column']),
                        'field_paths':
                        trans.get('field_paths'),
                        'type':
                        trans['type'],
                        'when':
                        trans.get('when'),
                    })

        return transformations
Example #7
0
    def save_tap_jsons(self, target, tap, extra_config_keys=None):
        """
        Generating JSON config files for a singer tap connector:
            1. config.json             :(Singer spec):  Tap connection details
            2. properties.json         :(Singer spec):  Tap schema properties (generated)
            3. state.json              :(Singer spec):  Bookmark for incremental and log_based
                                                        replications

            4. selection.json          :(Pipelinewise): List of streams/tables to replicate
            5. inheritabel_config.json :(Pipelinewise): Extra config keys for the linked
                                                        singer target connector that
                                                        pipelinewise will pass at run time
            6. transformation.json     :(Pipelinewise): Column transformations between the
                                                        tap and target
        """
        if extra_config_keys is None:
            extra_config_keys = {}
        tap_dir = self.get_tap_dir(target.get('id'), tap.get('id'))
        self.logger.info('SAVING TAP JSONS to %s', tap_dir)

        # Define tap JSON file paths
        tap_config_path = os.path.join(tap_dir, 'config.json')
        tap_selection_path = os.path.join(tap_dir, 'selection.json')
        tap_transformation_path = os.path.join(tap_dir, 'transformation.json')
        tap_inheritable_config_path = os.path.join(tap_dir,
                                                   'inheritable_config.json')

        # Create tap dir if not exists
        if not os.path.exists(tap_dir):
            os.mkdir(tap_dir)

        # Generate tap config dict: a merged dictionary of db_connection and optional extra_keys
        tap_config = {**tap.get('db_conn'), **extra_config_keys}

        # Get additional properties will be needed later to generate tap_stream_id
        tap_dbname = tap_config.get('dbname')

        # Generate tap selection
        selection = []
        for schema in tap.get('schemas', []):
            schema_name = schema.get('source_schema')
            for table in schema.get('tables', []):
                table_name = table.get('table_name')
                replication_method = table.get(
                    'replication_method',
                    utils.get_tap_default_replication_method(tap))
                selection.append(
                    utils.delete_empty_keys({
                        'tap_stream_id':
                        utils.get_tap_stream_id(tap, tap_dbname, schema_name,
                                                table_name),
                        'replication_method':
                        replication_method,

                        # Add replication_key only if replication_method is INCREMENTAL
                        'replication_key':
                        table.get('replication_key')
                        if replication_method == 'INCREMENTAL' else None
                    }))
        tap_selection = {'selection': selection}

        # Generate tap transformation
        transformations = []
        for schema in tap.get('schemas', []):
            schema_name = schema.get('source_schema')
            for table in schema.get('tables', []):
                table_name = table.get('table_name')
                for trans in table.get('transformations', []):
                    transformations.append({
                        'tap_stream_name':
                        utils.get_tap_stream_name(tap, tap_dbname, schema_name,
                                                  table_name),
                        'field_id':
                        trans['column'],
                        # Make column name safe by wrapping it in quotes, it's useful when a field_id is a reserved word
                        # to be used by target snowflake in fastsync
                        'safe_field_id':
                        safe_column_name(trans['column']),
                        'type':
                        trans['type'],
                        'when':
                        trans.get('when')
                    })
        tap_transformation = {'transformations': transformations}

        # Generate stream to schema mapping
        schema_mapping = {}
        for schema in tap.get('schemas', []):
            source_schema = schema.get('source_schema')
            target_schema = schema.get('target_schema')
            target_schema_select_perms = schema.get(
                'target_schema_select_permissions', [])

            schema_mapping[source_schema] = {
                'target_schema': target_schema,
                'target_schema_select_permissions': target_schema_select_perms
            }

            # Schema mapping can include list of indices to create. Some target components
            # like target-postgres create indices automatically
            indices = {}
            for table in schema.get('tables', []):
                table_name = table.get('table_name')
                table_indices = table.get('indices')
                if table_indices:
                    indices[table_name] = table_indices

            # Add indices map to schema mapping
            if indices:
                schema_mapping[source_schema]['indices'] = indices

        # Generate tap inheritable_config dict
        tap_inheritable_config = utils.delete_empty_keys({
            'temp_dir':
            self.get_temp_dir(),
            'batch_size_rows':
            tap.get('batch_size_rows', 20000),
            'parallelism':
            tap.get('parallelism', 0),
            'parallelism_max':
            tap.get('parallelism_max', 4),
            'hard_delete':
            tap.get('hard_delete', True),
            'flush_all_streams':
            tap.get('flush_all_streams', False),
            'primary_key_required':
            tap.get('primary_key_required', True),
            'default_target_schema':
            tap.get('default_target_schema'),
            'default_target_schema_select_permissions':
            tap.get('default_target_schema_select_permissions'),
            'schema_mapping':
            schema_mapping,

            # data_flattening_max_level
            # -------------------------
            #
            # 'data_flattening_max_level' is an optional parameter in some target connectors that specifies
            # how to load nested object into destination.
            #
            # We can load the original object represented as JSON or string (data flattening off) or we can
            # flatten the schema and data by creating columns automatically. When 'data_flattening_max_level'
            # is set to 0 then flattening functionality is turned off.
            #
            #  The value can be set in mutliple place and evaluated in the following order:
            # ------------
            #   1: First we try to find it in the tap YAML
            #   2: Second we try to get the tap type specific default value
            #   3: Otherwise we set flattening level to 0 (disabled)
            'data_flattening_max_level':
            tap.get(
                'data_flattening_max_level',
                utils.get_tap_property(
                    tap, 'default_data_flattening_max_level') or 0),
            'validate_records':
            tap.get('validate_records', False),
            'add_metadata_columns':
            tap.get('add_metadata_columns', False)
        })

        # Save the generated JSON files
        utils.save_json(tap_config, tap_config_path)
        utils.save_json(tap_inheritable_config, tap_inheritable_config_path)
        utils.save_json(tap_transformation, tap_transformation_path)
        utils.save_json(tap_selection, tap_selection_path)