def test_op_type_check_with_correct_type2(): """ With input type TYPE_TEXT, check if input_type and output_type are correct. """ meta = TM({ "tables": [ {"fields": [{'name': 'col', 'type': TM.TYPE_TEXT}]} ]}) op = FakeOp('col') meta2 = op.op_type_check(meta) assert meta2 == meta assert meta.get_type('col') == TM.TYPE_BOOL assert op.input_type == TM.TYPE_TEXT and op.output_type == TM.TYPE_BOOL
def test_op_type_check_with_wrong_type(): """ with input type TYPE_IDENTIFIER, check if None is returned by op_type_check. """ meta = TM({ "tables": [ {"fields": [{'name': 'col', 'type': TM.TYPE_IDENTIFIER}]} ]}) op = FakeOp('col') meta2 = op.op_type_check(meta) assert meta2 is None assert meta.get_type('col') == TM.TYPE_IDENTIFIER assert op.input_type is TM.TYPE_IDENTIFIER and op.output_type is None
def test_labeler_apply(): entity_id_column = "taxi_id" time_column = "trip_id" label_generating_column = "fare" filter_column = "fare" table_meta = TableMeta.from_json(meta_json_str) labeler = Labeler() df = dataframe entity_to_data_dict = trane.df_group_by_entity_id(df, entity_id_column) entity_id_to_data_and_cutoff_dict = trane.ConstantCutoffTime( 0, 0).generate_cutoffs(entity_to_data_dict, time_column) prediction_problem = PredictionProblem([ AllFilterOp(label_generating_column), IdentityRowOp(label_generating_column), IdentityTransformationOp(label_generating_column), LastAggregationOp(label_generating_column) ]) (is_valid_prediction_problem, filter_column_order_of_types, label_generating_column_order_of_types)\ = prediction_problem.is_valid_prediction_problem(table_meta, filter_column, label_generating_column) filename = "prediction_problem.json" prediction_problems_to_json_file([prediction_problem], table_meta, entity_id_column, label_generating_column, time_column, filename) labeler.execute(entity_id_to_data_and_cutoff_dict, filename) os.remove(filename)
def test_order_of_operations(): table_meta = TableMeta.from_json(meta_json_str) entity_id_column = "taxi_id" label_generating_column = "fare" time_column = "trip_id" filter_column = "taxi_id" ppg = PredictionProblemGenerator(table_meta, entity_id_column, label_generating_column, time_column, filter_column) logging.debug( "Dataframe in test_prediction_problem_generator.py: \n{}\n".format( dataframe)) generator = ppg.generate(dataframe) problems = [prob for prob in generator] for problem in problems: ops = problem.operations assert (len(ops) == 4) first_op = ops[0] second_op = ops[1] third_op = ops[2] fourth_op = ops[3] assert (issubclass(first_op.__class__, FilterOpBase)) assert (issubclass(second_op.__class__, RowOpBase)) assert (issubclass(third_op.__class__, TransformationOpBase)) assert (issubclass(fourth_op.__class__, AggregationOpBase))
def prep_for_integration(self): ''' Creates a full fledged prediction problem generator without a mocked out ensure_valid_inputs method ''' meta_json_str = ' \ {"path": "", \ "tables": [ \ {"path": "synthetic_taxi_data.csv",\ "name": "taxi_data", \ "fields": [ \ {"name": "vendor_id", "type": "id"},\ {"name": "taxi_id", "type": "id"}, \ {"name": "trip_id", "type": "datetime"}, \ {"name": "distance", "type": "number", "subtype": "float"}, \ {"name": "duration", "type": "number", "subtype": "float"}, \ {"name": "fare", "type": "number", "subtype": "float"}, \ {"name": "num_passengers", "type": "number", \ "subtype": "float"} \ ]}]}' self.table_meta = TableMeta.from_json(meta_json_str) self.df = pd.DataFrame([(0, 0, 0, 5.32, 19.7, 53.89, 1), (0, 0, 1, 1.08, 6.78, 18.89, 2), (0, 0, 2, 4.69, 14.11, 41.35, 4)], columns=[ "vendor_id", "taxi_id", "trip_id", "distance", "duration", "fare", "num_passengers" ]) self.generator = PredictionProblemGenerator(table_meta=self.table_meta, entity_col=self.entity_col, label_col=self.label_col, filter_col=self.filter_col)
def test_write_then_read(): table_meta = TableMeta.from_json(json_str) entity_id_column = "taxi_id" time_column = "trip_id" label_generating_column = "fare" prediction_problem = PredictionProblem([ AllFilterOp(label_generating_column), IdentityRowOp(label_generating_column), IdentityTransformationOp(label_generating_column), LastAggregationOp(label_generating_column) ]) filename = "prediction_problem.json" prediction_problems_to_json_file([prediction_problem], table_meta, entity_id_column, label_generating_column, time_column, filename) prediction_problems_from_json, table_meta_from_json, entity_id_column_from_json, \ label_generating_column_from_json, time_column_from_json = prediction_problems_from_json_file( filename) prediction_problem_from_json = prediction_problems_from_json[0] os.remove(filename) assert (prediction_problem == prediction_problem_from_json) assert (entity_id_column == entity_id_column_from_json) assert (time_column == time_column_from_json) assert (label_generating_column == label_generating_column_from_json) assert (table_meta == table_meta_from_json)
def test_number_of_problems_generated(): table_meta = TableMeta.from_json(meta_json_str) entity_id_column = "taxi_id" label_generating_column = "fare" time_column = "trip_id" filter_column = "taxi_id" ppg = PredictionProblemGenerator(table_meta, entity_id_column, label_generating_column, time_column, filter_column) generator = ppg.generate(dataframe) expected = 66 # THIS NUMBER WILL CHANGE IF MORE OPERATIONS ARE ADDED OR DECREASE BASED ON TYPE CHECKING found = len(list(generator)) assert (expected == found)
def test_generated_types(): table_meta = TableMeta.from_json(meta_json_str) entity_id_column = "taxi_id" label_generating_column = "fare" time_column = "trip_id" filter_column = "taxi_id" ppg = PredictionProblemGenerator(table_meta, entity_id_column, label_generating_column, time_column, filter_column) generator = ppg.generate(dataframe) expected = PredictionProblem problems = [prob for prob in generator] for problem in problems: found = problem assert (isinstance(found, expected))
def test_op_type_check(): filter_column = "fare" label_generating_column = "fare" table_meta = TableMeta.from_json(json_str) prediction_problem_correct_types = PredictionProblem([AllFilterOp(filter_column), IdentityRowOp( label_generating_column), IdentityTransformationOp( label_generating_column), LastAggregationOp(label_generating_column)]) label_generating_column = "vendor_id" prediction_problem_incorrect_types = PredictionProblem([AllFilterOp(filter_column), IdentityRowOp( label_generating_column), IdentityTransformationOp( label_generating_column), LMFAggregationOp(label_generating_column)]) ( correct_is_valid, filter_column_types_A, label_generating_column_types_A) = prediction_problem_correct_types.is_valid_prediction_problem( table_meta, filter_column, "fare") ( incorrect_is_valid, filter_column_types_B, label_generating_column_types_B) = prediction_problem_incorrect_types.is_valid_prediction_problem( table_meta, filter_column, label_generating_column) assert(filter_column_types_A == ['float', 'float']) assert(label_generating_column_types_A == [ 'float', 'float', 'float', 'float']) assert(filter_column_types_B is None) assert(label_generating_column_types_B is None) assert(correct_is_valid) assert(not incorrect_is_valid)