def test_rabbitmq_consuming(self): # Integration Test the Consuming Worker with 50,0000 messages # This test just uses send_task for publishing num_to_consume = 50000 num_sent = 0 num_to_send = num_to_consume msgs_to_send = [] msgs_by_id = {} not_done_publishing = True test_values = {"test_name": "large messages"} if len(msgs_to_send) == 0: while len(msgs_to_send) != num_to_send: test_msg = self.build_user_conversion_event_msg(test_values) msgs_to_send.append(test_msg) msgs_by_id[test_msg["msg_id"]] = False # end of building messages before slower publishing calls pub_auth_url = ev("RELAY_WORKER_BROKER_URL", "pyamqp://*****:*****@localhost:5672//") path_to_config_module = "ecomm_app.ecommerce.celeryconfig_pub_sub" app = ecomm_app.ecommerce.tasks.get_celery_app( name="demo", auth_url=pub_auth_url, path_to_config_module=path_to_config_module) task_name = "ecomm_app.ecommerce.tasks.handle_user_conversion_events" source_id = {"msg_proc": ev("TEST_RELAY_NAME", "test_ecomm_relay")} result = None log.info(("Sending broker={}") .format(app.conf.broker_url)) while not_done_publishing: if (num_sent % 1000 == 0) and num_sent > 0: log.info(("Published {} for " "{}/{} messages") .format(get_percent_done(num_sent, num_to_send), num_sent, num_to_send)) # end of if print for tracing msg_body = None if num_sent < len(msgs_to_send): msg_body = msgs_to_send[num_sent] result = app.send_task(task_name, (msg_body, source_id)) num_sent += 1 if num_sent >= num_to_send: log.info(("Published {} ALL " "{}/{} messages") .format(get_percent_done(num_sent, num_to_send), num_sent, num_to_send)) not_done_publishing = False elif num_sent >= len(msgs_to_send): log.info(("Published {} all " "{}/{} messages result={}") .format(get_percent_done(num_sent, len(msgs_to_send)), num_sent, num_to_send, result)) not_done_publishing = False # if should stop # end of not_done_publishing assert(num_sent == num_to_consume) log.info("") os.system("list-queues.sh") log.info("")
find_this = "08:00:27:89:2d:02" total_records = len(df.index) ridx = 1 for idx, row in df.iterrows(): for h in df.columns.values: if str(row[h]) == find_this: log.info(("found={} row={} column_header={}").format( find_this, ridx, h)) sys.exit(0) break # end of all columns perc_done = get_percent_done(ridx, total_records) if ridx % 1000 == 0: log.info(("done {} - {}/{} rows").format(perc_done, ridx, total_records)) ridx += 1 # end of all rows perc_done = get_percent_done(ridx, total_records) log.info(("Done {} - {}/{} rows").format(perc_done, ridx, total_records)) log.info( ("Did not find={} in training_data={}").format(find_this, training_file)) sys.exit(0)
def test_rabbitmq_consuming(self): # Integration Test the Subscriber Processor # This test just fills the queue for processing num_to_consume = 50000 num_sent = 0 num_to_send = num_to_consume msgs_to_send = [] msgs_by_id = {} self.exchange_name = ev("LOAD_TEST_EXCHANGE", "reporting") self.routing_key = ev("LOAD_TEST_ROUTING_KEY", "reporting.accounts") self.queue_name = ev("LOAD_TEST_QUEUE", "reporting.accounts") log.info(("Publishing {}/{} " "ex={} rk={} broker={}").format(num_sent, num_to_send, self.exchange_name, self.routing_key, self.pub_auth_url)) pub_retry = True not_done_publishing = True test_values = {"test_name": "large messages"} if len(msgs_to_send) == 0: while len(msgs_to_send) != num_to_send: test_msg = self.build_user_conversion_event_msg(test_values) msgs_to_send.append(test_msg) msgs_by_id[test_msg["msg_id"]] = False # end of building messages before slower publishing calls while not_done_publishing: if (num_sent % 1000 == 0) and num_sent > 0: log.info(("Published {} for " "{}/{} messages").format( get_percent_done(num_sent, num_to_send), num_sent, num_to_send)) # end of if print for tracing msg_body = None if num_sent < len(msgs_to_send): msg_body = msgs_to_send[num_sent] self.publish(body=msg_body, exchange=self.exchange_name, routing_key=self.routing_key, queue=self.queue_name, priority=0, ttl=None, serializer=self.pub_serializer, retry=pub_retry) num_sent += 1 if num_sent >= num_to_send: log.info(("Published {} ALL " "{}/{} messages").format( get_percent_done(num_sent, num_to_send), num_sent, num_to_send)) not_done_publishing = False elif num_sent >= len(msgs_to_send): log.info(("Published {} all " "{}/{} messages").format( get_percent_done(num_sent, len(msgs_to_send)), num_sent, num_to_send)) not_done_publishing = False # if should stop # end of not_done_publishing assert (num_sent == num_to_consume) os.system("list-queues.sh") log.info("") log.info(("display messages in the queues " "with routing_key={} again with:").format(self.routing_key)) log.info("list-queues.sh") log.info("")
def test_consuming_large_number_of_messages(self): # Test the Publisher and Subscriber with 50,0000 messages # and verify each unique message id was consumed # default is using the rabbitmq broker num_to_consume = 50000 num_sent = 0 num_to_send = num_to_consume num_consumed = 0 msgs_to_send = [] msgs_received = [] msgs_by_id = {} self.exchange_name = "test_large_num_1" self.routing_key = "test_large_num_1.orders" self.queue_name = "test_large_num_1.orders" class TestMessageProcessor: def __init__(self, should_consume=1, test_id=None, stop_after_num=-1, validate_for_test=False): self.num_consumed = 0 self.should_consume = should_consume self.stop_after_num = stop_after_num self.expected_test_id = test_id self.recv_msgs = [] self.validate_for_test = validate_for_test # end of __init__ def process_message(self, body, message): self.num_consumed += 1 test_id = "unknown" if "test_id" in body: test_id = body["test_id"] # validate we're not crossing test streams if self.validate_for_test and test_id != self.expected_test_id: log.error(("Test={} consumed a message " "from another test={} - please " "restart rabbitmq or clean up " " the queue and try again").format( self.expected_test_id, test_id)) assert (test_id == self.expected_test_id) # end of validating the test_id matches the msg's test_id log.debug(("test={} Consumed message " "{}/{} acking").format(test_id, self.num_consumed, self.should_consume)) msgs_received.append(body) message.ack() # end of process_message def get_received_messages(self): return self.recv_msgs # end of get_received_messages # end of TestMessageProcessor log.info(("Publishing {}/{} " "broker={}").format(num_sent, num_to_send, self.pub_auth_url)) pub_retry = True not_done_publishing = True not_done_subscribing = True test_values = {"test_name": "large messages"} if len(msgs_to_send) == 0: while len(msgs_to_send) != num_to_send: test_msg = self.build_msg(test_values) msgs_to_send.append(test_msg) msgs_by_id[test_msg["msg_id"]] = False # end of building messages before slower publishing calls while not_done_publishing: if (num_sent % 1000 == 0) and num_sent > 0: log.info(("Published {} for " "{}/{} messages").format( get_percent_done(num_sent, num_to_send), num_sent, num_to_send)) # end of if print for tracing msg_body = None if num_sent < len(msgs_to_send): msg_body = msgs_to_send[num_sent] self.publish(body=msg_body, exchange=self.exchange_name, routing_key=self.routing_key, queue=self.queue_name, priority=0, ttl=None, serializer=self.pub_serializer, retry=pub_retry) num_sent += 1 if num_sent >= num_to_send: log.info(("Published {} ALL " "{}/{} messages").format( get_percent_done(num_sent, num_to_send), num_sent, num_to_send)) not_done_publishing = False elif num_sent >= len(msgs_to_send): log.info(("Published {} all " "{}/{} messages").format( get_percent_done(num_sent, len(msgs_to_send)), num_sent, num_to_send)) not_done_publishing = False # if should stop # end of not_done_publishing log.info(("Creating Consumer " "{}/{} messages").format(num_consumed, num_to_consume)) test_consumer = TestMessageProcessor(should_consume=num_to_consume, test_id=self.test_id, stop_after_num=-1) log.info(("Starting Consume " "{}/{} messages").format(num_consumed, num_to_consume)) last_num_done = 0 while not_done_subscribing: if (num_consumed % 1000 == 0) and num_consumed > 0: log.info(("Consumed {} for " "{}/{} messages").format( get_percent_done(num_consumed, num_to_consume), num_consumed, num_to_consume)) # end of if print for tracing self.consume(callback=test_consumer.process_message, queue=self.queue_name, exchange=None, routing_key=None, forever=False, serializer=self.sub_serializer, heartbeat=60, time_to_wait=2.0) num_consumed = len(msgs_received) if num_consumed >= num_to_consume and last_num_done == num_consumed: log.info(("Consumed {} ALL " "{}/{} messages").format( get_percent_done(num_consumed, num_to_consume), num_consumed, num_to_consume)) not_done_subscribing = False else: # was there something in the queue already? if last_num_done == num_consumed: # if not sleep it out log.info(("Consumed {} " "{}/{} messages").format( get_percent_done(num_consumed, num_to_consume), num_consumed, num_to_consume)) # end of checking if something was found or not # if should stop last_num_done = num_consumed # end of not_done_subscribing log.info(("test={} consumed={} " "out of queue={}").format(self.test_id, len(msgs_received), self.queue_name)) # find test messages that were sent and validate the msg id was from # this test for consumed_msg in msgs_received: consumed_msg_id = consumed_msg["msg_id"] if consumed_msg_id in msgs_by_id: msgs_by_id[consumed_msg_id] = True # end of for all consumed messages num_valid_messages = 0 for msg_id in msgs_by_id: if not msg_id: log.error( ("FAILED to find " "test_id={} msg_id={}").format(self.test_id, msg_id)) assert (msg_id) else: num_valid_messages += 1 # end of for validating all the messages were found in the queue assert (num_valid_messages == num_to_consume)
log.error(("No columns={} found in training dataset={}").format( len(df.columns.values), training_file)) sys.exit(3) log.info( ("found rows={} columns={} in dataset={}").format(len(df.index), len(df.columns.values), training_file)) ATTACK_VALUE = 1 NON_ATTACK_VALUE = 0 filter_num_attacks = (df["label_value"] == ATTACK_VALUE) filter_num_nonattacks = (df["label_value"] == NON_ATTACK_VALUE) df_attacks = df[filter_num_attacks] df_nonattacks = df[filter_num_nonattacks] num_attacks = len(df_attacks.index) num_nonattacks = len(df_nonattacks.index) total_records = len(df.index) percent_attack = get_percent_done(num_attacks, total_records) percent_nonattack = get_percent_done(num_nonattacks, total_records) log.info(("total records={} attack={} nonattack={} " "percent_attack={}% percent_nonattack={}%").format( total_records, num_attacks, num_nonattacks, percent_attack, percent_nonattack)) sys.exit(0)
def run_publisher(broker_url, exchange=None, # kombu.Exchange object routing_key=None, # string msgs=[], num_per_batch=-1, priority="high", priority_routing={}, serializer="json", ssl_options={}, transport_options={}, send_method=None, silent=True, publish_silent=False, log_label="pub", *args, **kwargs): verbose = not silent if verbose: log.debug("connecting") with Connection(broker_url, ssl=ssl_options, transport_options=transport_options) as conn: num_to_send = len(msgs) if num_to_send == 0: log.info(("no msgs={} to publish") .format(num_to_send)) return use_send_method = send_method # use the default method for sending if one is not passed in if not use_send_method: use_send_method = celery_connectors.mixin_send_task_msg.mixin_send_task_msg if verbose: log.debug(("publishing ex={} rk={} " "msgs={} send_method={}") .format(exchange, routing_key, num_to_send, use_send_method.__name__)) num_sent = 0 not_done = True num_fails = 0 while not_done: cur_msg = msgs[num_sent] hide_logs = publish_silent if num_sent > 1 and num_sent % 200 == 0: hide_logs = False log.info(("{} send done " "msg={}/{} ex={} rk={}") .format(get_percent_done( num_sent, num_to_send), num_sent, num_to_send, exchange.name, routing_key)) send_res = use_send_method(conn=conn, data=cur_msg, exchange=exchange, routing_key=routing_key, priority=priority, priority_routing=priority_routing, serializer=serializer, silent=hide_logs, log_label=log_label) if send_res["status"] == SUCCESS: num_fails = 0 num_sent += 1 if num_sent >= num_to_send: not_done = False else: num_fails += 1 sleep_duration = calc_backoff_timer(num_fails) log.info(("publish failed - {} - exch={} rk={} " "sleep={} seconds retry={}") .format(send_res["error"], exchange, routing_key, sleep_duration, num_fails)) if num_fails > 100000: num_fails = 1 time.sleep(sleep_duration)