def run_process(port, listen_backlog, rabbit_host, clients,
                data_path="data"):
    # simple fail if file is not accesible
    open(PATH_TO_SAVE_BUSINESSES % data_path, 'wb').close()
    socket_downloader = SocketDataDownloader(port, listen_backlog, clients, data_path)
    while True:
        if not os.path.exists(BUSINESSES_READY % data_path):
            logger.info("Consuming businesses")
            data_gatherer = DataGatherer(data_path, clients)
            cp = RabbitQueueConsumerProducer(rabbit_host, BUSINESSES_QUEUE,
                                             [BUSINESS_NOTIFY_END],
                                             DummyStateCommiter(data_gatherer.gather_business_locations),
                                             messages_to_group=1, logger=logger)
            try:
                cp()
            except Exception as e:
                logger.exception("Error while consuming businesses")
                raise e

        try:
            logger.info("Starting download service")
            socket_downloader.start_download_listening()
        except Exception as e:
            logger.exception("Error accepting connections for downloading")
            raise e
        logger.info("Stoping downloader service")
        os.remove(BUSINESSES_READY % data_path)
        if os.path.exists(PATH_TO_SAVE_CLIENTS_ENDED % data_path):
            os.remove(PATH_TO_SAVE_CLIENTS_ENDED % data_path)
 def test_simple_filter(self):
     self.test_process = Process(target=self._start_process, args=(DummyStateCommiter(self.consume_filter),))
     self.test_process.start()
     self.channel.queue_declare(queue=CONSUME_QUEUE)
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "A", "value": 4.2}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "B", "value": 5}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "C", "value": "a"}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "D", "value": 4}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "A", "value": 2.2}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "A", "value": 4.1}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps([{"type": "A", "value": None},
                                                 {"type": "V", "value": None}]))
     processed_data = []
     for _ in range(4):
         processed_data.append(json.loads(self.recv_pipe.recv()))
     self.assertFalse(self.recv_pipe.poll(1))
     self.assertEqual(processed_data[0], {"type": "A", "value": 4.2})
     self.assertEqual(processed_data[1], {"type": "A", "value": 2.2})
     self.assertEqual(processed_data[2], {"type": "A", "value": 4.1})
     self.assertEqual(processed_data[3], {"type": "A", "value": None})
 def test_idempotency_set_integration(self):
     self.test_process = Process(target=self._start_process,
                                 args=(DummyStateCommiter(lambda m: self.publish_multiple(m, self.message_set)), 2))
     self.test_process.start()
     self.channel.queue_declare(queue=CONSUME_QUEUE)
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "A", "value": 4.2}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "B", "value": 7}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "C", "value": "a"}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "D", "value": 1}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps([{"type": "A", "value": 4.2},
                                                 {"type": "V", "value": 1}]))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "D", "value": 1}))
     processed_data = []
     for _ in range(8):
         processed_data.append(json.loads(self.recv_pipe.recv()))
     self.assertFalse(self.recv_pipe.poll(1))
     self.assertEqual(processed_data[0], [{"type": "A"}, {"type": "A"}])
     self.assertEqual(processed_data[1], [{"type": "A"}, {"type": "A"}])
     self.assertEqual(processed_data[2], [{"type": "B"}, {"type": "B"}])
     self.assertEqual(processed_data[3], [{"type": "B"}, {"type": "B"}])
     self.assertEqual(processed_data[4], [{"type": "B"}, {"type": "B"}])
     self.assertEqual(processed_data[5], [{"type": "B"}])
     self.assertEqual(processed_data[6], [{"type": "D"}])
     self.assertEqual(processed_data[7], [{"type": "V"}])
 def test_simple_multipy_message_with_grouping(self):
     self.test_process = Process(target=self._start_process, args=(DummyStateCommiter(self.publish_multiple), 2))
     self.test_process.start()
     self.channel.queue_declare(queue=CONSUME_QUEUE)
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "A", "value": 4.2}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "B", "value": 7}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "C", "value": "a"}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"type": "D", "value": 1}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps([{"type": "A", "value": None},
                                                 {"type": "V", "value": None}]))
     processed_data = []
     for _ in range(7):
         processed_data.append(json.loads(self.recv_pipe.recv()))
     self.assertFalse(self.recv_pipe.poll(1))
     self.assertEqual(processed_data[0], [{"type": "A"}, {"type": "A"}])
     self.assertEqual(processed_data[1], [{"type": "A"}, {"type": "A"}])
     self.assertEqual(processed_data[2], [{"type": "B"}, {"type": "B"}])
     self.assertEqual(processed_data[3], [{"type": "B"}, {"type": "B"}])
     self.assertEqual(processed_data[4], [{"type": "B"}, {"type": "B"}])
     self.assertEqual(processed_data[5], [{"type": "B"}])
     self.assertEqual(processed_data[6], [{"type": "D"}])
 def test_simple_stop(self):
     self.test_process = Process(target=self._start_process,
                                 args=(DummyStateCommiter(
                                     lambda m: self.republish_and_stop_with_key_z(m, self.message_set)), 2))
     self.test_process.start()
     self.channel.queue_declare(queue=CONSUME_QUEUE)
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"key": "A", "value": 4.2}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"key": "Z", "value": 7}))
     self.channel.basic_publish(exchange='', routing_key=CONSUME_QUEUE,
                                body=json.dumps({"key": "C", "value": "a"}))
     self.test_process.join()
     processed_data = []
     for _ in range(2):
         processed_data.append(json.loads(self.recv_pipe.recv()))
     self.assertFalse(self.recv_pipe.poll(1))
     self.assertEqual(processed_data[0], [{"key": "A", "value": 4.2}])
     self.assertEqual(processed_data[1], [{"key": "Z", "value": 7}])
def run_process(downloader_host,
                downloader_port,
                join_from_queue,
                output_joined_queue,
                rabbit_host,
                signature,
                data_path="data"):
    while True:
        if not os.path.exists(BUSINESSES_READY_PATH % data_path):
            logger.info("Waiting for downloader to be ready")

            cp = RabbitQueueConsumerProducer(
                rabbit_host,
                BUSINESS_NOTIFY_END, [BUSINESS_NOTIFY_END],
                DummyStateCommiter(partial(wait_for_file_ready, data_path)),
                messages_to_group=1,
                logger=logger)
            cp()
        logger.info("Downloader is ready")

        if not os.path.exists(BUSINESSES_DONE_PATH % data_path):
            while True:
                try:
                    logger.info("Downloading file")

                    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                    sock.connect((downloader_host, downloader_port))
                    socket_transferer = BlockingSocketTransferer(sock)
                    socket_transferer.send_plain_text("SEND FILE")
                    with open(PATH_TO_SAVE_BUSINESSES % data_path,
                              "wb") as business_file:
                        socket_transferer.receive_file_data(business_file)
                    socket_transferer.receive_plain_text()
                    socket_transferer.close()
                    Path(BUSINESSES_DONE_PATH % data_path).touch()
                    break
                except Exception as e:
                    logger.exception("Excception while downloading businesses")
                    sleep(1)

        logger.info("File is downloaded")
        logger.info("Starting consumer to join")
        if not os.path.exists(JOINING_DONE_PATH % data_path):
            with open(PATH_TO_SAVE_BUSINESSES % data_path,
                      "rb") as business_file:
                business_locations = pickle.load(business_file)
            joiner_state_commiter = DummyStateCommiter(
                partial(add_location_to_businesses, business_locations,
                        data_path, signature))
            cp = RabbitQueueConsumerProducer(rabbit_host,
                                             join_from_queue,
                                             [output_joined_queue],
                                             joiner_state_commiter,
                                             messages_to_group=1000,
                                             logger=logger)
            cp()
        logger.info("Ending stream")
        if not os.path.exists(ENDED_PATH % data_path):
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.connect((downloader_host, downloader_port))
            socket_transferer = BlockingSocketTransferer(sock)
            socket_transferer.send_plain_text("END_%s" % signature)
            assert socket_transferer.receive_plain_text() == "REGISTERED"
            Path(ENDED_PATH % data_path).touch()
            socket_transferer.close()
        os.remove(BUSINESSES_READY_PATH % data_path)
        os.remove(BUSINESSES_DONE_PATH % data_path)
        os.remove(JOINING_DONE_PATH % data_path)
        os.remove(ENDED_PATH % data_path)
            self.file.write("%s %d\n" % (item['day'], item['count']))
            return [], False

    def gather_funny_cities(self, item):
        if message_is_end(item):
            self.file.close()
            return [], True
        else:
            self.file.write("%s %d\n" % (item['city'], item['count']))
            return [], False


cp = RabbitQueueConsumerProducer(
    RABBIT_HOST,
    'yelp_users_50_or_more_reviews', [],
    DummyStateCommiter(DataGatherer(MORE_THAN_50_REVIEWS_PATH).gather_users),
    messages_to_group=1)
p = Process(target=cp)
p.start()
p.join()
print_w_timestamp("The result of users with 50 or more reviews are in %s" %
                  MORE_THAN_50_REVIEWS_PATH)
cp = RabbitQueueConsumerProducer(
    RABBIT_HOST,
    'yelp_users_50_or_more_reviews_and_5_stars', [],
    DummyStateCommiter(
        DataGatherer(MORE_THAN_50_REVIEWS_5_STARS_PATH).gather_users),
    messages_to_group=1)
p = Process(target=cp)
p.start()
p.join()