def test_slicing_on_batched_should_return_new_batch_frame(self): batch = Batch(frames=create_dataframe(2), outcomes={'test': [[None], [None]]}) expected = Batch(frames=create_dataframe(), outcomes={'test': [[None]]}) self.assertEqual(batch, batch[:]) self.assertEqual(expected, batch[:-1])
def test_set_outcomes_method_should_set_temp_outcome_when_bool_is_true( self): batch = Batch(frames=create_dataframe()) batch.set_outcomes('test', [1], is_temp=True) expected = Batch(frames=create_dataframe(), temp_outcomes={'test': [1]}) self.assertEqual(expected, batch)
def test_adding_batch_frame_with_outcomes_returns_new_batch_frame(self): batch_1 = Batch(frames=create_dataframe()) batch_2 = Batch(frames=create_dataframe()) batch_3 = Batch(frames=create_dataframe_same(2)) self.assertEqual(batch_3, batch_1 + batch_2)
def test_adding_batch_frame_with_outcomes_returns_new_batch_frame(self): batch_1 = Batch(frames=create_dataframe(), outcomes={'1': [1]}, temp_outcomes={'2': [1]}) batch_2 = Batch(frames=create_dataframe(), outcomes={'1': [2]}, temp_outcomes={'2': [2]}) batch_3 = Batch(frames=create_dataframe_same(2), outcomes={'1': [1, 2]}, temp_outcomes={'2': [1, 2]}) self.assertEqual(batch_3, batch_1 + batch_2)
def test_should_return_all_frames_when_no_predicate_is_applied(self): dataframe = create_dataframe(3) outcome_1 = Outcome( pd.DataFrame({ 'labels': ["car", "bus"], 'scores': [0.5, 0.6] }), 'labels') outcome_2 = Outcome(pd.DataFrame({ 'labels': ["bus"], 'scores': [0.5] }), 'labels') outcome_3 = Outcome( pd.DataFrame({ 'labels': ["car", "train"], 'scores': [0.5, 0.6] }), 'labels') batch = Batch(frames=dataframe, outcomes={"test": [outcome_1, outcome_2, outcome_3]}) plan = type("ScanPlan", (), {"predicate": None}) predicate_executor = SequentialScanExecutor(plan) predicate_executor.append_child(DummyExecutor([batch])) filtered = list(predicate_executor.exec())[0] self.assertEqual(batch, filtered)
def test_should_return_only_frames_satisfy_predicate(self): dataframe = create_dataframe(3) outcome_1 = Outcome( pd.DataFrame({ 'labels': ["car", "bus"], 'scores': [0.5, 0.6] }), 'labels') outcome_2 = Outcome(pd.DataFrame({ 'labels': ["bus"], 'scores': [0.5] }), 'labels') outcome_3 = Outcome( pd.DataFrame({ 'labels': ["car", "train"], 'scores': [0.5, 0.6] }), 'labels') batch = Batch(frames=dataframe, outcomes={"test": [outcome_1, outcome_2, outcome_3]}) expression = type("AbstractExpression", (), {"evaluate": lambda x: [False, False, True]}) plan = type("ScanPlan", (), {"predicate": expression}) predicate_executor = SequentialScanExecutor(plan) predicate_executor.append_child(DummyExecutor([batch])) expected = batch[[2]] filtered = list(predicate_executor.exec())[0] self.assertEqual(expected, filtered)
def test_has_outcomes_returns_true_if_the_given_name_is_in_outcomes(self): batch = Batch(frames=create_dataframe()) batch.set_outcomes('test_temp', [1], is_temp=True) batch.set_outcomes('test', [1]) self.assertTrue(batch.has_outcome('test')) self.assertTrue(batch.has_outcome('test_temp'))
def test_fetching_frames_by_index_should_also_return_temp_outcomes(self): batch = Batch(frames=create_dataframe_same(2), outcomes={'test': [[1], [2]]}, temp_outcomes={'test2': [[3], [4]]}) expected = Batch(frames=create_dataframe(), outcomes={'test': [[1]]}, temp_outcomes={'test2': [[3]]}) self.assertEqual(expected, batch[[0]])
def test_should_return_all_frames_when_no_predicate_is_applied(self): dataframe = create_dataframe(3) batch = Batch(frames=dataframe) plan = type("ScanPlan", (), {"predicate": None, "columns": None}) predicate_executor = SequentialScanExecutor(plan) predicate_executor.append_child(DummyExecutor([batch])) filtered = list(predicate_executor.exec())[0] self.assertEqual(batch, filtered)
def test_should_return_only_frames_satisfy_predicate(self): dataframe = create_dataframe(3) batch = Batch(frames=dataframe) expression = type("AbstractExpression", (), {"evaluate": lambda x: [ False, False, True]}) plan = type("PPScanPlan", (), {"predicate": expression}) predicate_executor = PPExecutor(plan) predicate_executor.append_child(DummyExecutor([batch])) expected = batch[[2]] filtered = list(predicate_executor.exec())[0] self.assertEqual(expected, filtered)
def test_should_return_only_frames_satisfy_predicate(self): dataframe = create_dataframe(3) batch = Batch(frames=dataframe) expression = type("AbstractExpression", (), {"evaluate": lambda x: Batch( pd.DataFrame([False, False, True]))}) plan = type("ScanPlan", (), {"predicate": expression, "columns": None}) predicate_executor = SequentialScanExecutor(plan) predicate_executor.append_child(DummyExecutor([batch])) expected = Batch(batch[[2]].frames.reset_index(drop=True)) filtered = list(predicate_executor.exec())[0] self.assertEqual(expected, filtered)
def test_should_return_all_frames_when_no_predicate_is_applied(self): dataframe = create_dataframe(3) outcome_1 = Prediction(dataframe.iloc[0], ["car", "bus"], [0.5, 0.6]) outcome_2 = Prediction(dataframe.iloc[1], ["bus"], [0.5, 0.6]) outcome_3 = Prediction(dataframe.iloc[2], ["car", "train"], [0.5, 0.6]) batch = FrameBatch( frames=dataframe, outcomes={"test": [outcome_1, outcome_2, outcome_3]}) plan = type("ScanPlan", (), {"predicate": None}) predicate_executor = SequentialScanExecutor(plan) predicate_executor.append_child(DummyExecutor([batch])) filtered = list(predicate_executor.exec())[0] self.assertEqual(batch, filtered)
def test_func_expr_with_cmpr_and_const_expr_should_work(self): frames = create_dataframe(2) outcome_1 = Outcome(pd.DataFrame( {'labels': ["car", "bus"], 'scores': [0.5, 0.6]}), 'labels') outcome_2 = Outcome(pd.DataFrame( {'labels': ["bus"], 'scores': [0.6]}), 'labels') func = FunctionExpression(lambda x: [outcome_1, outcome_2]) value_expr = ConstantValueExpression("car") expression_tree = ComparisonExpression(ExpressionType.COMPARE_EQUAL, func, value_expr) batch = Batch(frames=frames) self.assertEqual([True, False], expression_tree.evaluate(batch))
def test_should_return_projected_columns(self): dataframe = create_dataframe(3) batch = Batch(frames=dataframe) proj_batch = Batch(frames=pd.DataFrame(dataframe['data'])) expression = [ type( "AbstractExpression", (), { "evaluate": lambda x: Batch( pd.DataFrame( x.frames['data']))})] plan = type("ScanPlan", (), {"predicate": None, "columns": expression}) proj_executor = SequentialScanExecutor(plan) proj_executor.append_child(DummyExecutor([batch])) actual = list(proj_executor.exec())[0] self.assertEqual(proj_batch, actual)
def test_should_return_empty_dataframe(self): batch = Batch() self.assertEqual(batch, Batch(create_dataframe(0)))
def test_slicing_should_work_with_skip_value(self): batch = Batch(frames=create_dataframe(3), outcomes={'test': [[None], [None], [None]]}) expected = Batch(frames=create_dataframe(3).iloc[[0, 2], :], outcomes={'test': [[None], [None]]}) self.assertEqual(expected, batch[::2])
def test_slicing_should_work_with_skip_value(self): batch = Batch(frames=create_dataframe(3)) expected = Batch(frames=create_dataframe(3).iloc[[0, 2], :]) self.assertEqual(expected, batch[::2])
def test_should_return_correct_length(self): batch = Batch(create_dataframe(5)) self.assertEqual(5, len(batch))
def test_slicing_should_word_for_negative_stop_value(self): batch = Batch(frames=create_dataframe(2), outcomes={'test': [[None], [None]]}) expected = Batch(frames=create_dataframe(), outcomes={'test': [[None]]}) self.assertEqual(expected, batch[:-1])
def test_get_outcome_from_non_existing_udf_name_returns_empty_list(self): batch = Batch(frames=create_dataframe()) self.assertEqual([], batch.get_outcomes_for('test'))
def test_return_only_frames_specified_in_the_indices(self): batch = Batch(frames=create_dataframe(2)) expected = Batch(frames=create_dataframe()) output = batch[[0]] self.assertEqual(expected, output)
def test_has_outcomes_returns_false_if_the_given_name_not_in_outcomes( self): batch = Batch(frames=create_dataframe()) self.assertFalse(batch.has_outcome('temp'))
def test_slicing_should_word_for_negative_stop_value(self): batch = Batch(frames=create_dataframe(2)) expected = Batch(frames=create_dataframe()) self.assertEqual(expected, batch[:-1])
def test_adding_to_empty_frame_batch_returns_itself(self): batch_1 = Batch(frames=pd.DataFrame()) batch_2 = Batch(frames=create_dataframe(), outcomes={'1': [1]}) self.assertEqual(batch_2, batch_1 + batch_2)
def test_add_should_get_new_batch_frame_with_addition_no_outcomes(self): batch_1 = Batch(frames=create_dataframe()) batch_2 = Batch(frames=create_dataframe()) batch_3 = Batch(frames=create_dataframe_same(2)) self.assertEqual(batch_3, batch_1 + batch_2)
def test_add_should_raise_error_for_incompatible_type(self): batch = Batch(frames=create_dataframe()) with self.assertRaises(TypeError): batch + 1
def test_batch_from_json(self): batch = Batch(frames=create_dataframe(), identifier_column='id') batch2 = Batch.from_json(batch.to_json()) self.assertEqual(batch, batch2)
def test_fetching_frames_by_index(self): batch = Batch(frames=create_dataframe_same(2)) expected = Batch(frames=create_dataframe()) self.assertEqual(expected, batch[[0]])
def test_set_outcomes_method_should_set_the_predictions_with_udf_name( self): batch = Batch(frames=create_dataframe()) batch.set_outcomes('test', [None]) self.assertEqual([None], batch.get_outcomes_for('test'))
def test_slicing_on_batched_should_return_new_batch_frame(self): batch = Batch(frames=create_dataframe(2)) expected = Batch(frames=create_dataframe()) self.assertEqual(batch, batch[:]) self.assertEqual(expected, batch[:-1])