def define_inputs(self) -> tp.Dict[str, trac.ModelInputSchema]: customer_loans = trac.declare_input_table( trac.F("id", trac.BasicType.STRING, label="Customer account ID", business_key=True), trac.F("loan_amount", trac.BasicType.DECIMAL, label="Principal loan amount", format_code="CCY:EUR"), trac.F("total_pymnt", trac.BasicType.DECIMAL, label="Total amount repaid", format_code="CCY:EUR"), trac.F("region", trac.BasicType.STRING, label="Customer home region", categorical=True), trac.F("loan_condition_cat", trac.BasicType.INTEGER, label="Loan condition category", categorical=True)) return {"customer_loans": customer_loans}
def define_outputs(self) -> tp.Dict[str, _api.ModelOutputSchema]: profit_by_region = _api.declare_output_table( _api.F("region", _api.BasicType.STRING, label="Customer home region", categorical=True), _api.F("gross_profit", _api.BasicType.DECIMAL, label="Total gross profit", format_code="CCY:USD")) return {"profit_by_region": profit_by_region}
def define_outputs(self) -> tp.Dict[str, trac.ModelOutputSchema]: preprocessed = trac.declare_output_table( trac.F("id", trac.BasicType.STRING, label="Customer account ID", business_key=True), trac.F("some_quantity_x", trac.BasicType.DECIMAL, label="Some quantity X", format_code="CCY:EUR")) return {"preprocessed_data": preprocessed}
def define_outputs(self) -> tp.Dict[str, trac.ModelOutputSchema]: output1 = trac.declare_output_table( trac.F("field1", trac.BasicType.INTEGER, "Something about this field"), trac.F("field2", trac.BasicType.FLOAT, "Something about this other field")) return {"output1": output1}
def define_outputs(self) -> tp.Dict[str, api.ModelOutputSchema]: output_table_1 = api.declare_output_table( api.F("output_field_1", api.STRING, label="Output field 1", business_key=True), api.F("output_field_2", api.DATE, label="Output field 2"), api.F("output_field_3", api.FLOAT, label="Output field 3")) return {"output_table_1": output_table_1}
def define_inputs(self) -> tp.Dict[str, api.ModelInputSchema]: input_table_1 = api.declare_input_table( api.F("input_field_1", api.STRING, label="Input field 1", business_key=True), api.F("input_field_2", api.INTEGER, label="Input field 2"), api.F("input_field_3", api.STRING, label="Input field 3", categorical=True), api.F("input_field_4", api.DECIMAL, label="Input field 4")) return {"input_table_1": input_table_1}
def define_inputs(self) -> tp.Dict[str, trac.ModelInputSchema]: customer_loans = trac.declare_input_table( trac.F("id", trac.BasicType.STRING, label="Customer account ID", business_key=True), trac.F("loan_amount", trac.BasicType.DECIMAL, label="Principal loan amount", format_code="CCY:EUR"), trac.F("total_pymnt", trac.BasicType.DECIMAL, label="Total amount repaid", format_code="CCY:EUR"), trac.F("region", trac.BasicType.STRING, label="Customer home region", categorical=True), trac.F("loan_condition_cat", trac.BasicType.INTEGER, label="Loan condition category", categorical=True)) currency_data = trac.declare_input_table( trac.F("ccy_code", trac.BasicType.STRING, label="Currency code", categorical=True), trac.F("spot_date", trac.BasicType.DATE, label="Spot date for FX rate"), trac.F("dollar_rate", trac.BasicType.DECIMAL, label="Dollar FX rate", format_code="CCY:USD")) return {"customer_loans": customer_loans, "currency_data": currency_data}
class DataRoundTripModel(trac.TracModel): ROUND_TRIP_FIELDS = [ trac.F("boolean_field", trac.BasicType.BOOLEAN, label="BOOLEAN field"), trac.F("integer_field", trac.BasicType.INTEGER, label="INTEGER field"), trac.F("float_field", trac.BasicType.FLOAT, label="FLOAT field"), trac.F("decimal_field", trac.BasicType.DECIMAL, label="DECIMAL field"), trac.F("string_field", trac.BasicType.STRING, label="STRING field"), trac.F("date_field", trac.BasicType.DATE, label="DATE field"), trac.F("datetime_field", trac.BasicType.DATETIME, label="DATETIME field")] def define_parameters(self) -> tp.Dict[str, trac.ModelParameter]: return trac.declare_parameters( trac.P("use_spark", trac.BasicType.BOOLEAN, default_value=False, label="Use Spark for round trip testing")) def define_inputs(self) -> tp.Dict[str, trac.ModelInputSchema]: round_trip_input = trac.declare_input_table(self.ROUND_TRIP_FIELDS) return {"round_trip_input": round_trip_input} def define_outputs(self) -> tp.Dict[str, trac.ModelOutputSchema]: round_trip_output = trac.declare_input_table(self.ROUND_TRIP_FIELDS) return {"round_trip_output": round_trip_output} def run_model(self, ctx: trac.TracContext): use_spark = ctx.get_parameter("use_spark") if use_spark: round_trip_input = ctx.get_spark_table("round_trip_input") ctx.put_spark_table("round_trip_output", round_trip_input) else: round_trip_input = ctx.get_pandas_table("round_trip_input") ctx.put_pandas_table("round_trip_output", round_trip_input)