Ejemplo n.º 1
0
class CrossValidator:
    def __init__(self, settings: dict, ml_engine: MlEngine):
        self.settings = settings
        self.ml_engine = ml_engine
        self.testing_data = TestingData()
        self.training_data = TrainingData(self.settings['merchant_token'], self.settings['merchant_id'])

    def cross_validation(self):
        logging.debug('Creating training set')
        self.create_training_data()
        logging.debug('Creating testing set')
        self.create_testing_data()
        logging.debug('Perform learning')
        self.perform_learning()
        logging.debug('Calculate probabilties per offer and write them to disk')
        self.calculate_sales_probality_per_offer()
        logging.debug('Finished!')

    def create_training_data(self):
        self.training_data.append_by_csvs(self.settings['market_situation_csv_path'],
                                          self.settings['buy_offer_csv_path'],
                                          self.settings["initial_merchant_id"])

    def create_testing_data(self):
        self.testing_data.append_by_csvs(self.settings['testing_set_csv_path'],
                                         self.settings['initial_merchant_id'])

    def perform_learning(self):
        self.ml_engine.train_model(self.training_data.convert_training_data())

    def calculate_sales_probality_per_offer(self):
        probability_per_offer = []

        for joined_market_situations in self.testing_data.joined_data.values():
            for jms in joined_market_situations.values():
                if self.settings["initial_merchant_id"] in jms.merchants:
                    for offer_id in jms.merchants[self.settings["initial_merchant_id"]].keys():
                        features_ps = extract_features(offer_id, TrainingData.create_offer_list(jms), False, self.testing_data.product_prices)
                        probability = self.ml_engine.predict(jms.merchants[self.settings["initial_merchant_id"]][offer_id].product_id, [features_ps])
                        probability_per_offer.append((int(offer_id), probability[0]))
        write_calculations_to_file(probability_per_offer, self.settings['output_file'])
Ejemplo n.º 2
0
class MLMerchant(SuperMerchant):
    def __init__(self,
                 settings,
                 ml_engine: MlEngine,
                 api: ApiAbstraction = None):
        super().__init__(settings, api)
        self.last_learning = None
        self.ml_engine: MlEngine = ml_engine
        self.performance_calculator = PerformanceCalculator(
            ml_engine, self.merchant_id)
        self.training_data: TrainingData = None
        self.priceutils = PriceUtils()

    def initialize(self):
        if self.settings["data_file"] and os.path.isfile(
                self.settings["data_file"]):
            self.update_machine_learning()
        else:
            self.initial_learning()

        self.run_logic_loop()

    def update_machine_learning(self):
        thread = Thread(target=self.machine_learning_worker)
        thread.start()

    def machine_learning_worker(self):
        self.load_and_update_training_data()
        self.perform_learning()
        self.performance_calculator.calc_performance(self.training_data,
                                                     self.merchant_id)

    def initial_learning(self):
        self.create_training_data()
        self.perform_learning()
        self.performance_calculator.calc_performance(self.training_data,
                                                     self.merchant_id)
        logging.debug('Setup done. Starting merchant...')

    def perform_learning(self):
        self.ml_engine.train_model(self.training_data.convert_training_data())
        self.ml_engine.train_universal_model(
            self.training_data.convert_training_data(True))
        self.last_learning = datetime.datetime.now()

    def create_training_data(self):
        self.training_data = TrainingData(self.merchant_token,
                                          self.merchant_id)
        self.training_data.append_by_csvs(
            self.settings['market_situation_csv_path'],
            self.settings['buy_offer_csv_path'],
            self.settings["initial_merchant_id"])
        save_training_data(self.training_data, self.settings["data_file"])

    def load_and_update_training_data(self):
        self.training_data = load_history(self.settings["data_file"])
        self.training_data.merchant_token = self.merchant_token
        self.training_data.append_by_kafka(
            self.settings["kafka_reverse_proxy_url"])
        save_training_data(self.training_data, self.settings["data_file"])

    def execute_logic(self):
        self.perform_learning_if_necessary()
        self.api.reset_request_counter()

        # get and process existing offers
        offers = self.api.get_offers()
        own_offers = [
            offer for offer in offers if offer.merchant_id == self.merchant_id
        ]
        own_offers_by_uid = {offer.uid: offer for offer in own_offers}
        missing_offers = self.settings["max_amount_of_offers"] - sum(
            offer.amount for offer in own_offers)

        # buy new products
        new_products = self.buy_new_products(missing_offers)
        product_prices_by_uid = self.get_product_prices()

        # handle bought products and either add them to existing offers or create new ones
        self.update_existing_offers(offers, own_offers, product_prices_by_uid)
        self.process_bought_products(new_products, offers, own_offers_by_uid,
                                     product_prices_by_uid)

        return max(1.0,
                   self.api.request_counter) / self.settings["max_req_per_sec"]

    def get_product_prices(self) -> Dict[str, float]:
        products = self.api.get_products()
        return {product.uid: product.price for product in products}

    def process_bought_products(self, new_products: List[Product],
                                offers: List[Offer], own_offers_by_uid: dict,
                                product_prices_by_uid: dict):
        for product in new_products:
            self.process_bought_product(offers, own_offers_by_uid, product,
                                        product_prices_by_uid)

    def process_bought_product(self, offers, own_offers_by_uid, product,
                               product_prices_by_uid):
        try:
            if product.uid in own_offers_by_uid:
                self.update_existing_offer(offers, own_offers_by_uid, product,
                                           product_prices_by_uid)
            else:
                self.create_new_offer(offers, product, product_prices_by_uid)
        except Exception as e:
            print('could not handle product:', product, e)

    def create_new_offer(self, offers: List[Offer], product: Product,
                         product_prices_by_uid: dict):
        offer = Offer.from_product(product)
        offer.prime = True
        offer.shipping_time['standard'] = self.settings["shipping"]
        offer.shipping_time['prime'] = self.settings["primeShipping"]
        offer.merchant_id = self.merchant_id
        offer.price = self.calculate_optimal_price(product_prices_by_uid,
                                                   offer,
                                                   product.uid,
                                                   current_offers=offers +
                                                   [offer])
        self.api.add_offer(offer)

    def update_existing_offer(self, offers: List[Offer],
                              own_offers_by_uid: dict, product: Product,
                              product_prices_by_uid: dict):
        offer = own_offers_by_uid[product.uid]
        offer.amount += product.amount
        offer.signature = product.signature
        self.api.restock(offer.offer_id,
                         amount=product.amount,
                         signature=product.signature)
        offer.price = self.calculate_optimal_price(product_prices_by_uid,
                                                   offer,
                                                   product.uid,
                                                   current_offers=offers)
        self.api.update_offer(offer)

    def update_existing_offers(self, offers: List[Offer],
                               own_offers: List[Offer],
                               product_prices_by_uid: dict):
        for own_offer in own_offers:
            if own_offer.amount > 0:
                # only update an existing offer, when new price is different from existing one
                old_price = own_offer.price
                own_offer.price = self.calculate_optimal_price(
                    product_prices_by_uid,
                    own_offer,
                    own_offer.uid,
                    current_offers=offers)
                if float(own_offer.price) != float(old_price):
                    self.api.update_offer(own_offer)

    def buy_new_products(self, missing_offers: int):
        new_products = []
        for _ in range(missing_offers):
            prod = self.api.buy_product()
            new_products.append(prod)
        return new_products

    def perform_learning_if_necessary(self):
        if self.last_learning:
            next_training_session = self.last_learning + datetime.timedelta(
                minutes=self.settings["learning_interval"])
        if not self.last_learning or next_training_session <= datetime.datetime.now(
        ):
            self.last_learning = datetime.datetime.now()
            self.update_machine_learning()

    def calculate_optimal_price(self,
                                product_prices_by_uid: dict,
                                own_offer: Offer,
                                uid,
                                current_offers: List[Offer] = None):
        price = product_prices_by_uid[uid]
        if random.uniform(
                0, 1
        ) < 0.01 or self.training_data.number_marketsituations < self.settings[
                "min_marketsituations"]:
            print('r', end='')
            sys.stdout.flush()
            return self.priceutils.random_price(price)
        else:
            return self.highest_profit_from_ml(current_offers, own_offer,
                                               price)

    def highest_profit_from_ml(self, current_offers: List[Offer],
                               own_offer: Offer, price: float):
        try:
            potential_prices = self.priceutils.get_potential_prices(
                price, False)
            if str(own_offer.product_id) in self.ml_engine.product_model_dict:
                probas = self.__highest_profit_from_product_model(
                    current_offers, own_offer, potential_prices, price)
            else:
                probas = self.__highest_profit_from_universal_model(
                    current_offers, own_offer, potential_prices, price)
            expected_profits = self.priceutils.calculate_expected_profits(
                potential_prices, price, probas)
            best_price = potential_prices[expected_profits.index(
                max(expected_profits))]
            return best_price
        except (KeyError, ValueError, AttributeError) as e:
            raise e
            # Fallback for new products
            print('R', end='')
            print(e)
            sys.stdout.flush()
            return self.priceutils.random_price(price)

    def __highest_profit_from_universal_model(self, current_offers, own_offer,
                                              potential_prices, price):
        lst = self.__create_prediction_data(own_offer, current_offers,
                                            potential_prices, price, True)
        probas = self.ml_engine.predict_with_universal_model(lst)
        print('U', end='')
        sys.stdout.flush()
        return probas

    def __highest_profit_from_product_model(self, current_offers, own_offer,
                                            potential_prices, price):
        lst = self.__create_prediction_data(own_offer, current_offers,
                                            potential_prices, price, False)
        probas = self.ml_engine.predict(str(own_offer.product_id), lst)
        print('.', end='')
        sys.stdout.flush()
        return probas

    def __create_prediction_data(self, own_offer: Offer,
                                 current_offers: List[Offer],
                                 potential_prices: List[int], price: float,
                                 universal_features: bool):
        lst = []
        for potential_price in potential_prices:
            potential_price_candidate = potential_price / 10.0
            potential_price = price + potential_price_candidate

            setattr(
                next(offer for offer in current_offers
                     if offer.offer_id == own_offer.offer_id), "price",
                potential_price)
            prediction_data = extract_features(
                own_offer.offer_id, current_offers, universal_features,
                self.training_data.product_prices)
            lst.append(prediction_data)
        return lst