Example #1
0
def test_empty_input():
    GIVEN("a empty input dictionary and a record adapter")
    data = {}
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time="2019-01-01T00:00:00.000Z")

    WHEN("we convert the input")
    none = adapter.convert(data)

    THEN("None is returned")
    assert none is None
Example #2
0
def test_valid_record():
    GIVEN("a valid input dictionary and a record adapter")
    data = __get_data()
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time="2019-01-13T07:05:00.000Z")

    WHEN("we convert the input")
    adapted_data = adapter.convert(data)
    number_items = len(data.get("runners"))

    THEN("the correct number of items are returned")
    assert len(adapted_data.get("items") or []) == number_items
Example #3
0
def test_missing_market_info():
    GIVEN("a empty input dictionary and a record adapter")
    data = {
        "process_time": "2019-01-13T05:20:04Z",
        "marketId": "1.153509934",
    }
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time="2019-01-13T05:25:00.000Z")

    WHEN("we convert the input")
    none = adapter.convert(data)

    THEN("None is returned")
    assert none is None
Example #4
0
def test_get_ids_for_model_data():
    GIVEN("a data handler with some data and two fixed probabilities")
    GIVEN("a data handler and the directory and file name of a test file")
    directory = "./data/29184567"
    file_name = "1.156230797.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    record = file.get_file_as_list()[0]
    market_start_time = file.get_market_start_time()

    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    mediator = MockMediator()

    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    handler.process_data(record)

    WHEN(
        "we set the probabilities of two items and get the ids required for the next model run"
    )
    ids = handler.get_unique_ids()
    for runner_id in ids[0:2]:
        handler._set_probability(runner_id=runner_id, probability=0.1)
        ids.pop(0)

    THEN("the list omits the items which have fixed probabilities")
    model_ids = handler._get_ids_for_model_data()
    assert model_ids == ids
    assert len(model_ids) < len(handler.get_unique_ids())
Example #5
0
def test_mostly_valid_record():

    GIVEN(
        "a mostly valid input dictionary (with two invalid items) and a record adapter"
    )
    data = __get_data()
    for i in range(2):
        del data["runners"][i]["selectionId"]
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time="2019-01-13T07:05:00.000Z")

    WHEN("we convert the input")
    adapted_data = adapter.convert(data)
    number_items = len(data.get("runners")) - 2

    THEN("the correct number of items are returned")
    assert len(adapted_data.get("items") or []) == number_items
Example #6
0
    def __init__(
        self,
        market_id,
        external_api,
        market_start_time,
        data_adapter=None,
        bank=5000,
        data=None,
        models=None,
        orders=None,
    ):

        self.__market_id = market_id

        self.external_api: Colleague = external_api

        adapter = data_adapter or ExternalAPIMarketRecordAdapter(
            market_start_time=market_start_time)
        self.data: Colleague = data or DataHandler(
            mediator=self,
            adapter=adapter,
            container=DataContainer(),
        )

        self.models: Colleague = models or ModelHandler(
            mediator=self, wls_model=WeightedLinearRegression())

        self.orders: Colleague = orders or OrdersHandler(mediator=self,
                                                         bank=bank)

        self.__recipients = {
            "external data fetched": self.data.process_data,
            "data added to container": self.models.run_models,
            "models have results": self.orders.get_new_orders,
            "new orders": self.external_api.post_order,
            "orders posted": self.__delegate_posted_orders,
            "market closed": self.__exit,
            "no data provided multiple times": self.__exit,
            "finished processing": self.__finished,
        }
Example #7
0
def test_confirm_market_closed():
    GIVEN("a data handler and the directory and file name of a test file")
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time="2019-01-01T00:00:00.000Z")
    mediator = MockMediator()

    handler = DataHandler(mediator=mediator,
                          adapter=adapter,
                          container=DataContainer())

    WHEN("we check if the market is closed")
    closed = handler._confirm_market_closed()
    THEN("it is not")
    assert not closed

    GIVEN("the handler's container has the required column" +
          " but it does not indicate that the market is closed")
    closed_record = handler._container.new(
        data={("closed_indicator", ""): [0]})
    handler._container.add_rows(container=closed_record)

    WHEN("we check if the market is closed")
    closed = handler._confirm_market_closed()
    THEN("it is not")
    assert not closed

    GIVEN(
        "the handler's container has the required column indicating that the market is closed"
    )
    closed_record = handler._container.new(
        data={("closed_indicator", ""): [1]})
    handler._container.add_rows(container=closed_record)

    WHEN("we check if the market is closed")
    closed = handler._confirm_market_closed()
    THEN("it is")
    assert closed
Example #8
0
def test_removed_runner():
    GIVEN(
        "the directory and file name of a test file which contains a removed runner"
    )
    directory = "./data/29201704"
    file_name = "1.156695742.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    file_data = file.get_file_as_list()
    market_start_time = file.get_market_start_time()
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    number_runners = __get_number_runners(data=file_data)
    mediator = MockMediator()

    WHEN("we feed the data into a handler one record at a time")

    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    for i, record in enumerate(file_data):
        handler.process_data(record)
        THEN("the incoming record was processed")
        number_records_processed = i + 1
        THEN("the data container the correct number of records")
        assert handler._container.get_row_count() == number_records_processed

    WHEN("we have finished")
    THEN("the data container has the correct number of columns")
    assert handler._container.get_column_count() == __get_number_columns(
        number_runners)
    THEN("the data container has the same number of records as the raw data")
    assert handler._container.get_row_count() == len(file_data)
    THEN("the correct number of runners are contained in the object")
    assert len(handler.get_unique_ids()) == number_runners
Example #9
0
def test_fixed_probability(mock_notify):
    GIVEN("a data handler and the directory and file name of a test file")

    directory = "./data/29451865"
    file_name = "1.162069495.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    file_data = file.get_file_as_list()
    market_start_time = file.get_market_start_time()

    number_runners = __get_number_runners(data=file_data)
    unfixed_items = number_runners
    fixed_items = 0
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    pricer = PriceHandler()
    metadata = MetadataHandler()
    mediator = MockMediator()
    correct_probability = 1

    number_records_processed = 0

    WHEN("we feed the data into a handler one record at a time")
    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    for i, record in enumerate(file_data):
        number_records_processed = i + 1
        if number_records_processed % 10 == 0:
            WHEN("we randomly fix the probability of an item")
            id_to_fix = handler._get_ids_for_model_data()[0]
            fixed_probability = round(
                handler._container.get_last_column_entry(
                    name=("compositional_sp_probability", id_to_fix)),
                4,
            )
            handler._set_probability(runner_id=id_to_fix,
                                     probability=fixed_probability)
            correct_probability -= fixed_probability
            unfixed_items -= 1
            fixed_items += 1

        fixed_probability_ids = handler._get_fixed_probability_ids()
        THEN("the list of fixed probability ids is the correct length")
        assert len(fixed_probability_ids) == fixed_items

        handler.process_data(record)

        THEN("the handler's data has the correct number of records")
        assert handler._container.get_row_count() == number_records_processed

        THEN(
            "the mediator's notify method was called with the correct parameters"
        )
        model_data = handler._get_model_data()
        args, kwargs = mock_notify.call_args
        assert args == ()
        assert kwargs.get("data") == model_data
        assert kwargs.get("event") == "data added to container"

        THEN(
            "there is a record in the model data for each of the unfixed items"
        )
        assert len(model_data) == unfixed_items

        test_record = {
            each.get("id"): each
            for each in adapter.convert(record).get("items")
        }
        total_sp_probability = 0
        total_ex_probability = 0

        for data in model_data:
            THEN("each of the items in the model data has an non-zero id")
            runner_id = data.get("id")
            assert isinstance(runner_id, int)
            assert runner_id > 0

            THEN("the items probability has not been fixed")
            assert runner_id not in fixed_probability_ids

            test_item = test_record.get(runner_id)

            THEN("the data has the correct combined_back_size")
            combined_back_size = data.get("combined_back_size" +
                                          metadata.get_point_in_time_suffix())
            assert combined_back_size == (test_item.get("sp_back_size") +
                                          test_item.get("ex_back_size"))

            THEN(
                "the data contains the compositional sp probability which is between 0 and 1"
            )
            compositional_sp_probability = data.get(
                "compositional_sp_probability" +
                metadata.get_point_in_time_suffix())
            total_sp_probability += compositional_sp_probability
            assert 1 > compositional_sp_probability > 0

            THEN(
                "the data contains the compositional ex probability which is between 0 and 1"
            )
            compositional_ex_average_probability = data.get(
                "compositional_ex_average_probability" +
                metadata.get_point_in_time_suffix())
            total_ex_probability += compositional_ex_average_probability
            assert 1 > compositional_ex_average_probability > 0

            THEN("the data contains the correct offered price")
            offered_price = data.get("ex_offered_back_price" +
                                     metadata.get_point_in_time_suffix())
            assert offered_price > 0
            assert offered_price == test_item.get("ex_offered_back_price")

            THEN("the data contains the correct returns price")
            returns_price = data.get("ex_offered_back_price_mc" +
                                     metadata.get_point_in_time_suffix())
            assert returns_price > 0
            assert returns_price == pricer.remove_commission(
                test_item.get("ex_offered_back_price"))

            THEN(
                "the sp back price time series data returned is of the correct length"
            )
            compositional_sp_back_price_ts = (
                data.get("compositional_sp_back_price" +
                         metadata.get_time_series_suffix()) or [])
            assert len(
                compositional_sp_back_price_ts) == number_records_processed
            THEN(
                "the last record of the time series data matches the probability"
            )
            assert almost_equal(compositional_sp_back_price_ts[-1],
                                1 / compositional_sp_probability)

            THEN(
                "the extract time time series data returned is of the correct length"
            )
            extract_time_ts = (data.get("extract_time" +
                                        metadata.get_time_series_suffix())
                               or [])
            assert len(extract_time_ts) == number_records_processed
            for j, extract_time in enumerate(extract_time_ts):
                if j > 0:
                    THEN("the times in the series are ascending")
                    assert extract_time > extract_time_ts[j - 1]

            THEN(
                "the combined back size time series data returned is of the correct length"
            )
            combined_back_size_ts = (
                data.get("combined_back_size" +
                         metadata.get_time_series_suffix()) or [])
            assert len(combined_back_size_ts) == number_records_processed
            THEN(
                "the last entry in the time series is the same as point in time combined_back_size"
            )
            assert combined_back_size_ts[-1] == combined_back_size
            for j, combined_back_size in enumerate(combined_back_size_ts):
                if j > 0:
                    THEN("the sizes in the series are ascending")
                    assert combined_back_size >= combined_back_size_ts[j - 1]

        THEN("the total ex and sp probabilities from the model_data sum to 1")
        assert almost_equal(total_sp_probability, correct_probability)
        assert almost_equal(total_ex_probability, correct_probability)

    WHEN("we have finished")
    THEN("the data container has the correct number of columns")
    assert handler._container.get_column_count() == __get_number_columns(
        number_runners)
    THEN("the data container has the same number of records as the raw data")
    assert handler._container.get_row_count() == len(file_data)
    THEN("the correct number of runners are contained in the object")
    assert len(handler.get_unique_ids()) == number_runners
    THEN(
        "the correct number of fixed probabilities are contained in the object"
    )
    assert len(handler._get_fixed_probability_ids()) == round_down(
        number_records_processed / 10)