def test_empty_label(self):
     result = get_earliest_label("A1", "")
     self.assertEqual(result, "A1")
 def test_different_rows_cols(self):
     result = get_earliest_label("B2", "A2", "C3")
     self.assertEqual(result, "A2")
 def test_same_labels(self):
     result = get_earliest_label("B2", "B2")
     self.assertEqual(result, "B2")
 def test_same_rows_different_cols(self):
     result = get_earliest_label("A4", "D4", "E4")
     self.assertEqual(result, "A4")
 def test_different_rows_same_cols(self):
     result = get_earliest_label("A1", "B1")
     self.assertEqual(result, "A1")
 def test_upper_right_earlier_than_left_bottom(self):
     result = get_earliest_label("B4", "D2")
     self.assertEqual(result, "D2")
Example #7
0
    def purchases(self) -> List[Purchase]:
        """
        Match goods` names with appropriate prices and return list of Purchases.

        This method handles possible recognition artifacts when prices are not
        aligned with the goods and can be shifted up or down. It also handles
        the case when one good may have multiple prices which need to be added up.

        :return list: list of Purchases
        """
        result = []
        goods = copy(self.goods)
        goods_prices = copy(self.goods_prices)
        if not goods_prices and len(goods) == 1:
            goods_prices = {
                f"{self.PRICE_COLUMN}3":
                self.subtotal or self.total or self.actually_paid
            }

        elif len(goods_prices) < len(goods):
            raise ValueError(
                f"Some prices are missing in '{self.worksheet.title}'")

        # we determine if there are any goods with multiple prices per item in order
        # to determine the strategy of matching goods with the prices. Sometimes,
        # due to recognition artifacts, price may be shifted up or down comparing to
        # the item name, and therefore may appear to belong to the item which already
        # has the price. In some situations multiple prices per item is a valid case.
        # So, to distinguish that, we check it the total number of goods matches the
        # total number of prices: if they are equal, then we have an artifact.
        multiple_prices_per_good = len(goods_prices) > len(goods)

        while goods:
            good_label, (good_name, good_type) = next(iter(goods.items()))
            del goods[good_label]

            if multiple_prices_per_good:
                # greedy strategy - we should add as many prices as we can
                # before meeting the next good's name
                next_good_label = next(iter(goods), "")
                next_good_met = False
                result_price = 0
                while not next_good_met and goods_prices:
                    price_label, price = next(iter(goods_prices.items()))
                    result_price += price
                    del goods_prices[price_label]

                    if not goods_prices:
                        continue
                    next_price_label = next(iter(goods_prices), "")
                    earliest_label = get_earliest_label(
                        next_price_label, next_good_label)
                    next_good_met = earliest_label == next_good_label
            else:
                price_label, result_price = next(iter(goods_prices.items()))
                del goods_prices[price_label]

            purchase = Purchase(
                good_name=good_name,
                good_type=good_type,
                good_label=good_label,
                price=result_price,
                created=self.date,
            )
            result.append(purchase)

        return result