Ejemplo n.º 1
0
 def __init__(self, total_probability=1):
     self.__pricer = PriceHandler()
     self.__metadata = MetadataHandler()
     self.__total_probability = total_probability
     self.__remaining_probability = total_probability
     self.__fixed_probabilities = {}
     self.__transformed_data = {}
     self.__items = None
     self.__extract_time = None
     self.__closed_indicator = None
Ejemplo n.º 2
0
def test_calc_probability():
    GIVEN("a price handler and a price")
    handler = PriceHandler()
    price = 2

    WHEN("we calculate the associated probability")
    probability = handler.calc_probability(price=price)

    THEN("the correct probability is returned")
    assert probability == 0.5
Ejemplo n.º 3
0
def test_calc_discounted_probability():
    GIVEN("a price handler and a price")
    handler = PriceHandler()
    price = 2
    commission_on_profit = 0.05
    profit = price - 1

    WHEN("we calculate the probability of the price minus the commission")
    probability = handler.calc_discounted_probability(price=price)

    THEN("the correct probability is returned")
    assert probability == 1 / (((profit) * (1 - commission_on_profit)) + 1)
Ejemplo n.º 4
0
def test_remove_commission():
    GIVEN("a price and the returns handler")
    handler = PriceHandler()
    prices = [2.1, 1.05, 95, 75.1, 4.6, 9.1]

    for price in prices:
        WHEN("we calculate the return of the offered price")
        price_minus_commission = handler.remove_commission(price=price)
        commission_on_profit = 0.05
        profit = price - 1

        THEN("the correct price is returned")
        assert price_minus_commission == ((profit) *
                                          (1 - commission_on_profit)) + 1
Ejemplo n.º 5
0
def __calc_compositional_data(items, correct_probability):
    expected_data = make_copy(items)
    pricer = PriceHandler()
    total_probability = 0

    for item in expected_data:
        item["probability"] = pricer.calc_discounted_probability(
            item.get("price"))
        total_probability += item.get("probability")

    for item in expected_data:
        compositional_probability = (
            correct_probability / total_probability) * item.get("probability")
        item["compositional_probability"] = compositional_probability
        item["compositional_price"] = pricer.calc_price(
            compositional_probability)

    return expected_data
Ejemplo n.º 6
0
class TransformHandler:
    def __init__(self, total_probability=1):
        self.__pricer = PriceHandler()
        self.__metadata = MetadataHandler()
        self.__total_probability = total_probability
        self.__remaining_probability = total_probability
        self.__fixed_probabilities = {}
        self.__transformed_data = {}
        self.__items = None
        self.__extract_time = None
        self.__closed_indicator = None

    def set_probability(self, runner_id, probability):
        self.__fixed_probabilities[runner_id] = probability

    def get_fixed_probability_ids(self):
        return list(self.__fixed_probabilities.keys())

    def process(self, extracted_data={}):
        items = extracted_data.get("items") or []

        extract_time = extracted_data.get("extract_time")

        closed_indicator = extracted_data.get("closed_indicator")

        self.__transformed_data = {}
        self._set_items(items=items)
        self._set_extract_time(extract_time=extract_time)
        self._set_closed_indicator(closed_indicator=closed_indicator)
        self._calc_remaining_probability()

        if self.__is_valid_record():
            self.__add_extract_time()
            self.__add_closed_indicator()
            self.__add_default_data()
            self.__add_adj_back_prices()
            self.__add_combined_back_size()
            self.__add_compositional_sp_data()
            self.__add_compositional_ex_data()
            self.__add_market_back_size()

        return self.__transformed_data

    def _set_items(self, items):
        self.__items = self._exclude_fixed_items(items=items)

    def _exclude_fixed_items(self, items):
        return list(
            filter(
                lambda item: item.get("id") not in self.get_fixed_probability_ids(),
                items,
            )
        )

    def _set_extract_time(self, extract_time):
        self.__extract_time = extract_time

    def _set_closed_indicator(self, closed_indicator):
        self.__closed_indicator = closed_indicator

    def _calc_remaining_probability(self):
        self.__remaining_probability = self.__total_probability - sum(
            self.__fixed_probabilities.values()
        )

    def __is_valid_record(self):
        return self.__items and self.__extract_time is not None

    def __add_extract_time(self):
        self.__transformed_data[("extract_time", "")] = [self.__extract_time]

    def __add_closed_indicator(self):
        self.__transformed_data[("closed_indicator", "")] = [
            self.__closed_indicator and self.__extract_time >= 0
        ]

    def __add_default_data(self):
        for column in self.__metadata.get_required_variables():
            for item in self.__items:
                self.__transformed_data[
                    self.__get_composite_column_name(variable=column, item=item)
                ] = [item.get(column)]

    def __add_adj_back_prices(self):
        for column in self.__metadata.get_back_prices():
            for item in self.__items:
                self.__transformed_data[
                    self.__get_composite_column_name(
                        variable=(
                            column + self.__metadata.get_minus_commission_suffix()
                        ),
                        item=item,
                    )
                ] = [self.__pricer.remove_commission(item.get(column))]

    def __add_combined_back_size(self):
        for item in self.__items:
            combined_back_size = sum(
                item.get(size) for size in self.__metadata.get_back_sizes()
            )
            self.__transformed_data[
                self.__get_composite_column_name(
                    variable="combined_back_size", item=item
                )
            ] = [combined_back_size]

    def __add_compositional_sp_data(self):
        self.__add_compositional_data(name="sp")

    def __add_compositional_ex_data(self):
        self.__add_compositional_data(name="ex_average")

    def __add_compositional_data(self, name):
        compositional_data = self._get_compositional_data(
            price_name=(name + "_back_price")
        )

        for item in compositional_data:
            self.__transformed_data[
                self.__get_composite_column_name(
                    variable="compositional_" + name + "_probability", item=item
                )
            ] = [item.get("compositional_probability")]

            self.__transformed_data[
                self.__get_composite_column_name(
                    variable="compositional_" + name + "_back_price", item=item
                )
            ] = [item.get("compositional_price")]

    def __add_market_back_size(self):
        self.__transformed_data[("market_back_size", "")] = [
            sum(
                item.get(size)
                for size in self.__metadata.get_back_sizes()
                for item in self.__items
            )
        ]

    def __get_composite_column_name(self, variable, item):
        return (variable, item.get("id"))

    def _get_compositional_data(self, price_name):

        probabilities = list(
            map(
                lambda item: self.__calc_initial_probability(
                    item=item, price_name=price_name
                ),
                self.__items,
            )
        )

        probability_handler = ProbabilityHandler(
            items=probabilities,
            name="probability",
            correct_probability=self.__remaining_probability,
        )
        compositional_probabilities = (
            probability_handler.calc_compositional_probabilities()
        )

        compositional_data = list(
            map(
                self.__add_compositional_price,
                compositional_probabilities,
            )
        )

        return compositional_data

    def __calc_initial_probability(self, item, price_name):

        probability = {
            "id": item.get("id"),
            "probability": self.__pricer.calc_discounted_probability(
                item.get(price_name)
            ),
        }
        return probability

    def __add_compositional_price(self, item):
        item["compositional_price"] = self.__pricer.calc_price(
            item.get("compositional_probability")
        )
        return item
Ejemplo n.º 7
0
def test_fixed_probability(mock_notify):
    GIVEN("a data handler and the directory and file name of a test file")

    directory = "./data/29451865"
    file_name = "1.162069495.txt"
    file = HistoricalExternalAPIFileHander(directory=directory, file=file_name)
    file_data = file.get_file_as_list()
    market_start_time = file.get_market_start_time()

    number_runners = __get_number_runners(data=file_data)
    unfixed_items = number_runners
    fixed_items = 0
    adapter = ExternalAPIMarketRecordAdapter(
        market_start_time=market_start_time)
    pricer = PriceHandler()
    metadata = MetadataHandler()
    mediator = MockMediator()
    correct_probability = 1

    number_records_processed = 0

    WHEN("we feed the data into a handler one record at a time")
    handler = DataHandler(
        mediator=mediator,
        adapter=adapter,
        container=DataContainer(),
    )
    for i, record in enumerate(file_data):
        number_records_processed = i + 1
        if number_records_processed % 10 == 0:
            WHEN("we randomly fix the probability of an item")
            id_to_fix = handler._get_ids_for_model_data()[0]
            fixed_probability = round(
                handler._container.get_last_column_entry(
                    name=("compositional_sp_probability", id_to_fix)),
                4,
            )
            handler._set_probability(runner_id=id_to_fix,
                                     probability=fixed_probability)
            correct_probability -= fixed_probability
            unfixed_items -= 1
            fixed_items += 1

        fixed_probability_ids = handler._get_fixed_probability_ids()
        THEN("the list of fixed probability ids is the correct length")
        assert len(fixed_probability_ids) == fixed_items

        handler.process_data(record)

        THEN("the handler's data has the correct number of records")
        assert handler._container.get_row_count() == number_records_processed

        THEN(
            "the mediator's notify method was called with the correct parameters"
        )
        model_data = handler._get_model_data()
        args, kwargs = mock_notify.call_args
        assert args == ()
        assert kwargs.get("data") == model_data
        assert kwargs.get("event") == "data added to container"

        THEN(
            "there is a record in the model data for each of the unfixed items"
        )
        assert len(model_data) == unfixed_items

        test_record = {
            each.get("id"): each
            for each in adapter.convert(record).get("items")
        }
        total_sp_probability = 0
        total_ex_probability = 0

        for data in model_data:
            THEN("each of the items in the model data has an non-zero id")
            runner_id = data.get("id")
            assert isinstance(runner_id, int)
            assert runner_id > 0

            THEN("the items probability has not been fixed")
            assert runner_id not in fixed_probability_ids

            test_item = test_record.get(runner_id)

            THEN("the data has the correct combined_back_size")
            combined_back_size = data.get("combined_back_size" +
                                          metadata.get_point_in_time_suffix())
            assert combined_back_size == (test_item.get("sp_back_size") +
                                          test_item.get("ex_back_size"))

            THEN(
                "the data contains the compositional sp probability which is between 0 and 1"
            )
            compositional_sp_probability = data.get(
                "compositional_sp_probability" +
                metadata.get_point_in_time_suffix())
            total_sp_probability += compositional_sp_probability
            assert 1 > compositional_sp_probability > 0

            THEN(
                "the data contains the compositional ex probability which is between 0 and 1"
            )
            compositional_ex_average_probability = data.get(
                "compositional_ex_average_probability" +
                metadata.get_point_in_time_suffix())
            total_ex_probability += compositional_ex_average_probability
            assert 1 > compositional_ex_average_probability > 0

            THEN("the data contains the correct offered price")
            offered_price = data.get("ex_offered_back_price" +
                                     metadata.get_point_in_time_suffix())
            assert offered_price > 0
            assert offered_price == test_item.get("ex_offered_back_price")

            THEN("the data contains the correct returns price")
            returns_price = data.get("ex_offered_back_price_mc" +
                                     metadata.get_point_in_time_suffix())
            assert returns_price > 0
            assert returns_price == pricer.remove_commission(
                test_item.get("ex_offered_back_price"))

            THEN(
                "the sp back price time series data returned is of the correct length"
            )
            compositional_sp_back_price_ts = (
                data.get("compositional_sp_back_price" +
                         metadata.get_time_series_suffix()) or [])
            assert len(
                compositional_sp_back_price_ts) == number_records_processed
            THEN(
                "the last record of the time series data matches the probability"
            )
            assert almost_equal(compositional_sp_back_price_ts[-1],
                                1 / compositional_sp_probability)

            THEN(
                "the extract time time series data returned is of the correct length"
            )
            extract_time_ts = (data.get("extract_time" +
                                        metadata.get_time_series_suffix())
                               or [])
            assert len(extract_time_ts) == number_records_processed
            for j, extract_time in enumerate(extract_time_ts):
                if j > 0:
                    THEN("the times in the series are ascending")
                    assert extract_time > extract_time_ts[j - 1]

            THEN(
                "the combined back size time series data returned is of the correct length"
            )
            combined_back_size_ts = (
                data.get("combined_back_size" +
                         metadata.get_time_series_suffix()) or [])
            assert len(combined_back_size_ts) == number_records_processed
            THEN(
                "the last entry in the time series is the same as point in time combined_back_size"
            )
            assert combined_back_size_ts[-1] == combined_back_size
            for j, combined_back_size in enumerate(combined_back_size_ts):
                if j > 0:
                    THEN("the sizes in the series are ascending")
                    assert combined_back_size >= combined_back_size_ts[j - 1]

        THEN("the total ex and sp probabilities from the model_data sum to 1")
        assert almost_equal(total_sp_probability, correct_probability)
        assert almost_equal(total_ex_probability, correct_probability)

    WHEN("we have finished")
    THEN("the data container has the correct number of columns")
    assert handler._container.get_column_count() == __get_number_columns(
        number_runners)
    THEN("the data container has the same number of records as the raw data")
    assert handler._container.get_row_count() == len(file_data)
    THEN("the correct number of runners are contained in the object")
    assert len(handler.get_unique_ids()) == number_runners
    THEN(
        "the correct number of fixed probabilities are contained in the object"
    )
    assert len(handler._get_fixed_probability_ids()) == round_down(
        number_records_processed / 10)