Esempio n. 1
0
 def cast_property_type(self, property_name: str, column_name: str,
                        jinja_column: str) -> str:
     definition = self.properties[property_name]
     if "type" not in definition:
         print(
             f"WARN: Unknown type for column {property_name} at {self.current_json_path()}"
         )
         return column_name
     elif is_array(definition["type"]):
         return self.cast_property_type_as_array(property_name, column_name)
     elif is_object(definition["type"]):
         sql_type = self.cast_property_type_as_object(
             property_name, column_name)
     # Treat simple types from narrower to wider scope type: boolean < integer < number < string
     elif is_boolean(definition["type"]):
         cast_operation = jinja_call(f"cast_to_boolean({jinja_column})")
         return f"{cast_operation} as {column_name}"
     elif is_integer(definition["type"]):
         sql_type = jinja_call("dbt_utils.type_bigint()")
     elif is_number(definition["type"]):
         sql_type = jinja_call("dbt_utils.type_float()")
     elif is_string(definition["type"]):
         sql_type = jinja_call("dbt_utils.type_string()")
     else:
         print(
             f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}"
         )
         return column_name
     return f"cast({column_name} as {sql_type}) as {column_name}"
Esempio n. 2
0
 def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str,
                                                                   str]],
                               path: List[str]) -> str:
     if path and len(path) == 1:
         field = path[0]
         if not is_airbyte_column(field):
             if "type" in self.properties[field]:
                 property_type = self.properties[field]["type"]
             else:
                 property_type = "object"
             if is_number(property_type) or is_boolean(
                     property_type) or is_array(property_type) or is_object(
                         property_type):
                 # some destinations don't handle float columns (or other types) as primary keys, turn everything to string
                 return f"cast({self.safe_cast_to_string(field, self.properties[field], column_names[field][1])} as {jinja_call('dbt_utils.type_string()')})"
             else:
                 return field
         else:
             # using an airbyte generated column
             return f"cast({field} as {jinja_call('dbt_utils.type_string()')})"
     else:
         if path:
             raise ValueError(
                 f"Unsupported nested path {'.'.join(path)} for stream {self.stream_name}"
             )
         else:
             raise ValueError(
                 f"No path specified for stream {self.stream_name}")
Esempio n. 3
0
 def safe_cast_to_string(property_name: str, definition: Dict,
                         column_name: str) -> str:
     if "type" not in definition:
         return column_name
     elif is_boolean(definition["type"]):
         return f"boolean_to_string({column_name})"
     elif is_array(definition["type"]):
         return f"array_to_string({column_name})"
     else:
         return column_name
Esempio n. 4
0
 def extract_json_column(property_name: str, json_column_name: str, definition: Dict, column_name: str) -> str:
     json_path = [property_name]
     json_extract = jinja_call(f"json_extract({json_column_name}, {json_path})")
     if "type" in definition:
         if is_array(definition["type"]):
             json_extract = jinja_call(f"json_extract_array({json_column_name}, {json_path})")
         elif is_object(definition["type"]):
             json_extract = jinja_call(f"json_extract({json_column_name}, {json_path})")
         elif is_simple_property(definition["type"]):
             json_extract = jinja_call(f"json_extract_scalar({json_column_name}, {json_path})")
     return f"{json_extract} as {column_name}"
Esempio n. 5
0
 def safe_cast_to_string(definition: Dict, column_name: str) -> str:
     """
     Note that the result from this static method should always be used within a jinja context (for example, from jinja macro surrogate_key call)
     """
     if "type" not in definition:
         return column_name
     elif is_boolean(definition["type"]):
         return f"boolean_to_string({column_name})"
     elif is_array(definition["type"]):
         return f"array_to_string({column_name})"
     else:
         return column_name
Esempio n. 6
0
 def find_children_streams(
         self, from_table: str,
         column_names: Dict[str, Tuple[str,
                                       str]]) -> List["StreamProcessor"]:
     """
     For each complex type properties, generate a new child StreamProcessor that produce separate child pipelines.
     The current stream/table is used as the parent from which to extract data from.
     """
     properties = self.properties
     children: List[StreamProcessor] = []
     for field in properties.keys():
         children_properties = None
         if is_airbyte_column(field):
             pass
         elif is_combining_node(properties[field]):
             # TODO: merge properties of all combinations
             pass
         elif "type" not in properties[field] or is_object(
                 properties[field]["type"]):
             # properties without 'type' field are treated like properties with 'type' = 'object'
             children_properties = find_properties_object([], field,
                                                          properties[field])
             is_nested_array = False
             # json_column_name = f"'{field}'"
             json_column_name = column_names[field][1]
         elif is_array(properties[field]
                       ["type"]) and "items" in properties[field]:
             quoted_field = column_names[field][1]
             children_properties = find_properties_object(
                 [], field, properties[field]["items"])
             is_nested_array = True
             json_column_name = f"unnested_column_value({quoted_field})"
         if children_properties:
             for child_key in children_properties:
                 stream_processor = StreamProcessor.create_from_parent(
                     parent=self,
                     child_name=field,
                     json_column_name=json_column_name,
                     properties=children_properties[child_key],
                     is_nested_array=is_nested_array,
                     from_table=from_table,
                 )
                 children.append(stream_processor)
     return children