def cast_property_type(self, property_name: str, column_name: str, jinja_column: str) -> str: definition = self.properties[property_name] if "type" not in definition: print( f"WARN: Unknown type for column {property_name} at {self.current_json_path()}" ) return column_name elif is_array(definition["type"]): return self.cast_property_type_as_array(property_name, column_name) elif is_object(definition["type"]): sql_type = self.cast_property_type_as_object( property_name, column_name) # Treat simple types from narrower to wider scope type: boolean < integer < number < string elif is_boolean(definition["type"]): cast_operation = jinja_call(f"cast_to_boolean({jinja_column})") return f"{cast_operation} as {column_name}" elif is_integer(definition["type"]): sql_type = jinja_call("dbt_utils.type_bigint()") elif is_number(definition["type"]): sql_type = jinja_call("dbt_utils.type_float()") elif is_string(definition["type"]): sql_type = jinja_call("dbt_utils.type_string()") else: print( f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}" ) return column_name return f"cast({column_name} as {sql_type}) as {column_name}"
def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], path: List[str]) -> str: if path and len(path) == 1: field = path[0] if not is_airbyte_column(field): if "type" in self.properties[field]: property_type = self.properties[field]["type"] else: property_type = "object" if is_number(property_type) or is_boolean( property_type) or is_array(property_type) or is_object( property_type): # some destinations don't handle float columns (or other types) as primary keys, turn everything to string return f"cast({self.safe_cast_to_string(field, self.properties[field], column_names[field][1])} as {jinja_call('dbt_utils.type_string()')})" else: return field else: # using an airbyte generated column return f"cast({field} as {jinja_call('dbt_utils.type_string()')})" else: if path: raise ValueError( f"Unsupported nested path {'.'.join(path)} for stream {self.stream_name}" ) else: raise ValueError( f"No path specified for stream {self.stream_name}")
def safe_cast_to_string(property_name: str, definition: Dict, column_name: str) -> str: if "type" not in definition: return column_name elif is_boolean(definition["type"]): return f"boolean_to_string({column_name})" elif is_array(definition["type"]): return f"array_to_string({column_name})" else: return column_name
def extract_json_column(property_name: str, json_column_name: str, definition: Dict, column_name: str) -> str: json_path = [property_name] json_extract = jinja_call(f"json_extract({json_column_name}, {json_path})") if "type" in definition: if is_array(definition["type"]): json_extract = jinja_call(f"json_extract_array({json_column_name}, {json_path})") elif is_object(definition["type"]): json_extract = jinja_call(f"json_extract({json_column_name}, {json_path})") elif is_simple_property(definition["type"]): json_extract = jinja_call(f"json_extract_scalar({json_column_name}, {json_path})") return f"{json_extract} as {column_name}"
def safe_cast_to_string(definition: Dict, column_name: str) -> str: """ Note that the result from this static method should always be used within a jinja context (for example, from jinja macro surrogate_key call) """ if "type" not in definition: return column_name elif is_boolean(definition["type"]): return f"boolean_to_string({column_name})" elif is_array(definition["type"]): return f"array_to_string({column_name})" else: return column_name
def find_children_streams( self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> List["StreamProcessor"]: """ For each complex type properties, generate a new child StreamProcessor that produce separate child pipelines. The current stream/table is used as the parent from which to extract data from. """ properties = self.properties children: List[StreamProcessor] = [] for field in properties.keys(): children_properties = None if is_airbyte_column(field): pass elif is_combining_node(properties[field]): # TODO: merge properties of all combinations pass elif "type" not in properties[field] or is_object( properties[field]["type"]): # properties without 'type' field are treated like properties with 'type' = 'object' children_properties = find_properties_object([], field, properties[field]) is_nested_array = False # json_column_name = f"'{field}'" json_column_name = column_names[field][1] elif is_array(properties[field] ["type"]) and "items" in properties[field]: quoted_field = column_names[field][1] children_properties = find_properties_object( [], field, properties[field]["items"]) is_nested_array = True json_column_name = f"unnested_column_value({quoted_field})" if children_properties: for child_key in children_properties: stream_processor = StreamProcessor.create_from_parent( parent=self, child_name=field, json_column_name=json_column_name, properties=children_properties[child_key], is_nested_array=is_nested_array, from_table=from_table, ) children.append(stream_processor) return children