Example #1
0
    def __init__(self,
                 name=ev("PUBLISHER_NAME", "kombu-publisher"),
                 auth_url=ev("BROKER_URL", "redis://localhost:6379/0"),
                 ssl_options={},
                 max_general_failures=-1):  # infinite retries
        """
        Available Transports:
        https://github.com/celery/kombu#transport-comparison
        """

        self.state = "not_ready"
        self.name = name
        self.auth_url = auth_url
        self.ssl_options = ssl_options

        self.exchange = None
        self.queue = None
        self.declare_entities = []
        self.conn = None
        self.channel = None
        self.producer = None
        self.num_setup_failures = 0
        self.num_publish_failures = 0
        self.max_general_failures = max_general_failures

        self.exchange_name = ""
        self.exchange_type = "direct"
        self.queue_name = ""
        self.routing_key = ""
        self.serializer = "json"
    def __init__(self,
                 name="message-processor",
                 sub_auth_url=ev("SUB_BROKER_URL", "redis://localhost:6379/0"),
                 sub_ssl_options={},
                 sub_serializer="application/json",
                 sub_silent=False,
                 pub_auth_url=ev("PUB_BROKER_URL", "redis://localhost:6379/0"),
                 pub_ssl_options={},
                 pub_serializer="json",
                 pub_silent=False):

        self.name = name
        self.recv_msgs = []
        self.sub_auth_url = sub_auth_url
        self.pub_auth_url = pub_auth_url
        self.sub_ssl_options = sub_ssl_options
        self.pub_ssl_options = pub_ssl_options
        self.sub_serializer = sub_serializer
        self.pub_serializer = pub_serializer
        self.pub_queue_name = None

        self.sub = None
        self.pub = None

        self.exchange = None
        self.exchange_name = ""
        self.queue = None
        self.queue_name = ""
        self.routing_key = None
        self.pub_routing_key = None
        self.pub_hook_version = 1

        self.sub_verbose = not sub_silent
        self.pub_verbose = not pub_silent
def capture_arp_packets():
    """capture_arp_packets

    Capture ``ARP`` packets and call the ``handle_packets`` method

    Change the network interface by ``export CAP_DEVICE=eth0``

    """
    dev = ev("CAP_DEVICE", "lo")
    """
    Ignore ports for forwarding to consolidators:

    Redis VM: 6379, 16379
    RabbitMQ VM: 5672, 15672, 25672

    """

    # http://biot.com/capstats/bpf.html
    default_filter = "arp"
    custom_filter = ev("NETWORK_FILTER", default_filter)

    log.info(("starting device={} filter={}").format(dev, custom_filter))

    scapy.sniff(filter=custom_filter, prn=handle_packets)

    log.info("done")
def example_capture():
    """example_capture

    An example capture script

    Change the network interface by ``export CAP_DEVICE=eth0``

    """

    dev = ev("CAP_DEVICE", "lo")
    """
    Ignore ports for forwarding to consolidators:

    Redis Internal VM: 6379, 16379
    RabbitMQ Internal VM: 5672, 15672, 25672
    """

    # http://biot.com/capstats/bpf.html
    custom_filter = ("(udp and portrange 10000-17001) "
                     "or (tcp and portrange 80) "
                     "or arp "
                     "or icmp")

    log.info(("starting device={} filter={}").format(dev, custom_filter))

    scapy.sniff(filter=custom_filter, prn=handle_packets)

    log.info("done")
Example #5
0
    def __init__(self,
                 name=ev("SUBSCRIBER_NAME", "celery-subscriber"),
                 auth_url=ev("BROKER_URL", "redis://localhost:6379/0"),
                 app=None,
                 ssl_options={},
                 transport_options={},
                 worker_log_format="%(asctime)s: %(levelname)s %(message)s",
                 **kwargs):
        """
        Available Brokers:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/index.html

        Redis:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/redis.html

        RabbitMQ:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/rabbitmq.html

        SQS:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/sqs.html
        """

        self.state = "not_ready"
        self.name = name
        self.auth_url = auth_url
        self.ssl_options = ssl_options
        self.transport_options = transport_options

        self.subscriber_app = None

        # allow passing in an initialized Celery application
        if app:
            self.subscriber_app = app
        else:
            self.subscriber_app = Celery()

        # update the celery configuration from the kwargs dictionary
        self.subscriber_app.conf.update(kwargs)

        # make sure to set the broker_url
        self.subscriber_app.conf.broker_url = self.auth_url
        self.subscriber_app.conf.worker_log_format = worker_log_format

        self.exchange = None
        self.consume_from_queues = []
    def __init__(self,
                 name=ev("SUBSCRIBER_NAME", "kombu-subscriber"),
                 auth_url=ev("BROKER_URL", "redis://localhost:6379/0"),
                 ssl_options={},
                 max_general_failures=-1):  # infinite retries

        """
        Available Brokers:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/index.html

        Redis:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/redis.html

        RabbitMQ:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/rabbitmq.html

        SQS:
        http://docs.celeryproject.org/en/latest/getting-started/brokers/sqs.html
        """

        self.state = "not_ready"
        self.name = name
        self.auth_url = auth_url
        self.ssl_options = ssl_options

        self.conn = None
        self.new_conn = None
        self.channel = None
        self.consumer = None
        self.process_message_callback = None
        self.drain_time = 1.0
        self.num_setup_failures = 0
        self.num_consume_failures = 0
        self.max_general_failures = max_general_failures

        self.exchange = None
        self.exchange_name = ""
        self.routing_key = ""
        self.serializer = "json"
        self.queue = None
        self.queue_name = ""
        self.consume_from_queues = []
Example #7
0
def build_ssl_options(ca_cert="",
                      keyfile="",
                      certfile="",
                      ssl_required="0"):

    use_ca_certs = ev("SSL_CA_CERT", ca_cert)
    use_keyfile = ev("SSL_KEYFILE", keyfile)
    use_certfile = ev("SSL_CERTFILE", certfile)
    use_ssl_required = ev("SSL_REQUIRED", ssl_required) == "1"

    ssl_options = {}
    if use_ca_certs:
        ssl_options["ca_certs"] = use_ca_certs
    if use_keyfile:
        ssl_options["keyfile"] = use_keyfile
    if use_certfile:
        ssl_options["certfile"] = use_certfile
    if use_ssl_required:
        ssl_options["cert_reqs"] = ssl.CERT_REQUIRED

    return ssl_options
Example #8
0
    def setUp(self):
        if self.debug:
            print("setUp")

        # state trips in the custom classes
        os.environ["TEST_STOP_DONE"] = "1"

        self.last_pub_msg = None
        self.last_sub_msg = None
        self.pub = None
        self.sub = None
        self.pub_msgs = []
        self.sub_msgs = []

        self.exchange_name = ev("TEST_EXCHANGE", "test.events")
        self.routing_key = ev("TEST_ROUTING_KEY", "test.events.conversions")
        self.queue_name = ev("TEST_QUEUE", "test.events.conversions")

        self.exchange = None
        self.queue = None
        self.last_sub_callback = None
Example #9
0
def relay_callback(body, message):

    pub_auth_url = ev("RELAY_WORKER_BROKER_URL",
                      "pyamqp://*****:*****@localhost:5672//")
    pub_backend_url = ev("RELAY_BACKEND_URL", "redis://localhost:6379/12")
    path_to_config_module = ev("RELAY_CONFIG_MODULE",
                               "ecomm_app.ecommerce.celeryconfig_pub_sub")

    app = ecomm_app.ecommerce.tasks.get_celery_app(
        name=ev("RELAY_NAME", "ecomm-relay"),
        auth_url=pub_auth_url,
        backend_url=pub_backend_url,
        path_to_config_module=path_to_config_module)

    task_name = ev("RELAY_TASK_NAME",
                   "ecomm_app.ecommerce.tasks.handle_user_conversion_events")
    now = datetime.now().isoformat()
    body = {
        "account_id": 999,
        "subscription_id": 321,
        "stripe_id": 876,
        "created": now,
        "product_id": "JJJ",
        "version": 1,
        "org_msg": body,
        "msg_id": str(uuid.uuid4())
    }

    source_info = {"msg_proc": ev("RELAY_NAME", "ecomm_relay")}

    log.info(("Sending broker={} "
              "body={}").format(app.conf.broker_url, body))

    result = app.send_task(task_name, (body, source_info))

    if "simulate_processing_lag" in body:
        log.info(("task - {} - simulating processing"
                  "lag={} sleeping").format(task_name,
                                            body["simulate_processing_lag"]))
        time.sleep(float(body["simulate_processing_lag"]))
    # end of handling adding artifical lag for testing Celery

    log.info(("Done with msg_id={} result={}").format(body["msg_id"],
                                                      result.get()))

    # now that the message has been
    # sent to the celery ecomm worker
    # we can ack the message which
    # deletes it from the source queue
    # the message processor uses
    message.ack()
recv_msgs = []


def handle_message(body, message):
    log.info(("callback received msg " "body={}").format(body))
    recv_msgs.append(body)
    message.ack()


# end of handle_message

# Initialize KombuSubscriber
# http://docs.celeryproject.org/en/latest/getting-started/brokers/sqs.html
# https://github.com/celery/kombu/blob/master/kombu/transport/SQS.py
aws_key = ev("SQS_AWS_ACCESS_KEY", "not_a_key")
aws_secret = ev("SQS_AWS_SECRET_KEY", "not_a_secret")

sqs_auth_url = ev("BROKER_URL", "sqs://{}:{}@".format(aws_key, aws_secret))

transport_options = {}
ssl_options = {}
sub = KombuSubscriber("kombu-sqs-subscriber", sqs_auth_url, ssl_options)
# sample: "sqs://*****:*****@"
# ^ from the doc: 'you must remember to include the "@" at the end.'

# Now consume:
seconds_to_consume = 10.0
serializer = "application/json"
queue = "test1"
exchange = "test1"
Example #11
0
recv_msgs = []


def handle_message(body, message):
    log.info(("callback received msg "
              "body={}")
             .format(body))
    recv_msgs.append(body)
    message.ack()
# end of handle_message


# Initialize KombuSubscriber
ssl_options = {}
sub = KombuSubscriber("kombu-mixin-subscriber",
                      ev("SUB_BROKER_URL",
                         "amqp://*****:*****@localhost:5672//"),
                      ssl_options)


# Now consume:
seconds_to_consume = 10.0
heartbeat = 60
serializer = "application/json"
exchange = ev("CONSUME_EXCHANGE", "reporting.payments")
routing_key = ev("CONSUME_ROUTING_KEY", "reporting.payments")
queue = ev("CONSUME_QUEUE", "reporting.payments")
sub.consume(callback=handle_message,
            queue=queue,
            exchange=exchange,
            routing_key=routing_key,
            serializer=serializer,
Example #12
0
from kombu.mixins import ConsumerProducerMixin
from celery_connectors.utils import ev
from celery_connectors.utils import build_msg
from celery_connectors.utils import get_exchange_from_msg
from celery_connectors.utils import get_routing_key_from_msg
from celery_connectors.run_publisher import run_publisher

# Credits and inspirations from these great sources:
#
# https://github.com/celery/kombu/blob/master/examples/rpc-tut6/rpc_server.py
# https://gist.github.com/oubiwann/3843016
# https://gist.github.com/eavictor/ee7856581619ac60643b57987b7ed580#file-mq_kombu_rpc_server-py
# https://github.com/Skablam/kombu-examples
# https://gist.github.com/mlavin/6671079

log = logging.getLogger(ev("APP_NAME", "relay"))


class RelayWorker(ConsumerProducerMixin):
    def __init__(self,
                 name="relay",
                 conn=None,
                 callback=None,
                 task_queues=[],
                 prefetch_count=1,
                 relay_exchange=None,
                 relay_exchange_type=None,
                 relay_routing_key=None,
                 relay_queue=None,
                 relay_broker_url=None,
                 relay_ssl_options={},
Example #13
0
def publish_processed_network_packets(name="not-set",
                                      task_queue=None,
                                      result_queue=None,
                                      need_response=False,
                                      shutdown_msg="SHUTDOWN"):
    """
    # Redis/RabbitMQ/SQS messaging endpoints for pub-sub
    routing_key = ev("PUBLISH_EXCHANGE",
                     "reporting.accounts")
    queue_name = ev("PUBLISH_QUEUE",
                    "reporting.accounts")
    auth_url = ev("PUB_BROKER_URL",
                  "redis://localhost:6379/0")
    serializer = "json"
    """

    # these keys need to be cycled to prevent
    # exploiting static keys
    filter_key = ev("IGNORE_KEY", INCLUDED_IGNORE_KEY)

    forward_host = ev("FORWARD_HOST", "127.0.0.1")
    forward_port = int(ev("FORWARD_PORT", "80"))
    include_filter_key = ev("FILTER_KEY", "")
    if not include_filter_key and filter_key:
        include_filter_key = filter_key

    filter_keys = [filter_key]

    log.info(("START consumer={} "
              "forward={}:{} with "
              "key={} filters={}").format(name, forward_host, forward_port,
                                          include_filter_key, filter_key))

    forward_skt = None

    not_done = True
    while not_done:

        if not forward_skt:
            forward_skt = connect_forwarder(forward_host=forward_host,
                                            forward_port=forward_port)

        next_task = task_queue.get()
        if next_task:

            if str(next_task) == shutdown_msg:
                # Poison pill for shutting down
                log.info(("{}: DONE CALLBACK "
                          "Exiting msg={}").format(name, next_task))
                task_queue.task_done()
                break
            # end of handling shutdown case

            try:
                log.debug(("{} parsing").format(name))

                source = next_task.source
                packet = next_task.payload

                if not packet:
                    log.error(("{} invalid task found "
                               "{} missing payload").format(name, next_task))
                    break

                log.debug(("{} found msg from src={}").format(name, source))

                network_data = parse_network_data(
                    data_packet=packet,
                    include_filter_key=include_filter_key,
                    filter_keys=filter_keys)

                if network_data["status"] == VALID:
                    if network_data["data_type"] == TCP \
                        or network_data["data_type"] == UDP \
                            or network_data["data_type"] == ARP \
                            or network_data["data_type"] == ICMP:

                        log.info(
                            ("{} valid={} packet={} "
                             "data={}").format(name, network_data["id"],
                                               network_data["data_type"],
                                               network_data["target_data"]))

                        if not forward_skt:
                            forward_skt = connect_forwarder(
                                forward_host=forward_host,
                                forward_port=forward_port)

                        if forward_skt:
                            if network_data["stream"]:

                                sent = False
                                while not sent:
                                    try:
                                        log.info("sending={}".format(
                                            network_data["stream"]))
                                        send_msg(
                                            forward_skt,
                                            network_data["stream"].encode(
                                                "utf-8"))
                                        sent = True
                                    except Exception as e:
                                        sent = False
                                        time.sleep(0.5)
                                        try:
                                            forward_skt.close()
                                            forward_skt = None
                                        except Exception as w:
                                            forward_skt = None
                                        forward_skt = connect_forwarder(
                                            forward_host=forward_host,
                                            forward_port=forward_port)
                                # end of reconnecting

                                log.info("sent={}".format(
                                    network_data["stream"]))

                                if need_response:
                                    log.info("receiving")
                                    cdr_res = forward_skt.recv(1024)
                                    log.info(("cdr - res{}").format(cdr_res))
                            else:
                                log.info(("{} EMPTY stream={} "
                                          "error={} status={}").format(
                                              name, network_data["stream"],
                                              network_data["err"],
                                              network_data["status"]))
                    else:
                        log.info(("{} not_supported valid={} "
                                  "packet data_type={} status={}").format(
                                      name, network_data["id"],
                                      network_data["data_type"],
                                      network_data["status"]))
                elif network_data["status"] == FILTERED:
                    log.info(("{} filtered={} status={}").format(
                        name, network_data["filtered"],
                        network_data["status"]))
                else:
                    if network_data["status"] == INVALID:
                        log.info(("{} invalid={} packet={} "
                                  "error={} status={}").format(
                                      name, network_data["id"],
                                      network_data["data_type"],
                                      network_data["error"],
                                      network_data["status"]))
                    else:
                        log.info(("{} unknown={} packet={} "
                                  "error={} status={}").format(
                                      name, network_data["id"],
                                      network_data["data_type"],
                                      network_data["error"],
                                      network_data["status"]))
                # end of if valid or not data
            except KeyboardInterrupt as k:
                log.info(("{} stopping").format(name))
                break
            except Exception as e:
                log.error(("{} failed packaging packet to forward "
                           "with ex={}").format(name, e))
                break
            # end of try/ex during payload processing
        # end of if found a next_task

        log.info(("Consumer: {} {}").format(name, next_task))
        task_queue.task_done()

        if need_response:
            answer = "processed: {}".format(next_task())
            result_queue.put(answer)
    # end of while

    if forward_skt:
        try:
            forward_skt.close()
            log.info("CLOSED connection")
            forward_skt = None
        except Exception:
            log.info("CLOSED connection")
    # end of cleaning up forwarding socket

    log.info("{} Done".format(name))

    return
Example #14
0
def build_new_deep_neural_network_from_env_variables():
    """build_new_deep_neural_network_from_env_variables

    Build a new deep neural network from environment variables:

    ``CSV_FILE`` - file to process created during prepare dataset
    ``CSV_META_FILE`` - metadata header file created during prepare dataset
    ``PREDICT_FEATURE`` - column to predict
    ``TEST_SIZE`` - split data into percentage of test to training
    """

    csv_file = ev("CSV_FILE", "/tmp/cleaned_attack_scans.csv")
    meta_file = ev("CSV_META_FILE", "/tmp/cleaned_metadata.json")
    predict_feature = ev("PREDICT_FEATURE", "label_value")
    test_size = float(ev("TEST_SIZE", "0.20"))

    if not os.path.exists(csv_file):
        log.error(("missing csv_file={}").format(csv_file))
        sys.exit(1)

    res = build_training_request(csv_file=csv_file,
                                 meta_file=meta_file,
                                 predict_feature=predict_feature,
                                 test_size=test_size)

    if res["status"] != VALID:
        log.error(("Stopping for status={} "
                   "errors: {}").format(res["status"], res["err"]))
        sys.exit(1)
    else:
        log.info(("built_training_request={} "
                  "features={} ignore={}").format(res["status"],
                                                  res["features_to_process"],
                                                  res["ignore_features"]))
    # end of validating the training request

    log.info("ready for training")

    log.info("creating Keras - sequential model")

    # create the model
    model = Sequential()
    model.add(
        Dense(8,
              input_dim=len(res["features_to_process"]),
              kernel_initializer="uniform",
              activation="relu"))
    model.add(Dense(6, kernel_initializer="uniform", activation="relu"))
    model.add(Dense(1, kernel_initializer="uniform", activation="sigmoid"))

    log.info("compiling model")

    # compile the model
    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])

    log.info("fitting model - please wait")

    # fit the model
    model.fit(res["X_train"],
              res["Y_train"],
              validation_data=(res["X_test"], res["Y_test"]),
              epochs=50,
              batch_size=2,
              verbose=1)

    # evaluate the model
    scores = model.evaluate(res["X_test"], res["Y_test"])

    log.info(("Accuracy: {}").format(scores[1] * 100))
from celery_connectors.utils import ERROR
from celery_connectors.utils import ev
from celery_connectors.utils import build_sample_msgs
from celery_connectors.utils import calc_backoff_timer
from celery_connectors.build_ssl_options import build_ssl_options


# Credits and inspirations from these great sources:
#
# https://github.com/celery/kombu/blob/master/examples/rpc-tut6/rpc_server.py
# https://gist.github.com/oubiwann/3843016
# https://gist.github.com/eavictor/ee7856581619ac60643b57987b7ed580#file-mq_kombu_rpc_server-py
# https://github.com/Skablam/kombu-examples
# https://gist.github.com/mlavin/6671079

name = ev("APP_NAME", "robopubsub")
log = build_colorized_logger(
    name=name)


broker_url = ev("PUB_BROKER_URL", "pyamqp://*****:*****@localhost:5672//")
exchange_name = ev("PUBLISH_EXCHANGE", "ecomm.api")
exchange_type = ev("PUBLISH_EXCHANGE_TYPE", "topic")
routing_key = ev("PUBLISH_ROUTING_KEY", "ecomm.api.west")
queue_name = ev("PUBLISH_QUEUE", "ecomm.api.west")
prefetch_count = int(ev("PREFETCH_COUNT", "1"))
priority_routing = {"high": queue_name,
                    "low": queue_name}
use_exchange = Exchange(exchange_name, type=exchange_type)
use_routing_key = routing_key
use_queue = Queue(queue_name, exchange=use_exchange, routing_key=routing_key)
Example #16
0
import logging
from kombu import Connection
from celery_connectors.utils import ev
from celery_connectors.relay_json_to_celery_worker import RelayJSONtoCeleryWorker

# Credits and inspirations from these great sources:
#
# https://github.com/celery/kombu/blob/master/examples/rpc-tut6/rpc_server.py
# https://gist.github.com/oubiwann/3843016
# https://gist.github.com/eavictor/ee7856581619ac60643b57987b7ed580#file-mq_kombu_rpc_server-py
# https://github.com/Skablam/kombu-examples
# https://gist.github.com/mlavin/6671079

log = logging.getLogger(ev("APP_NAME", "jtoc"))


def run_jtoc_relay(broker_url,
                   ssl_options={},
                   transport_options={},
                   task_queues=[],
                   callback=None,
                   prefetch_count=1,
                   relay_broker_url=None,
                   relay_backend_url=None,
                   relay_exchange=None,
                   relay_routing_key=None,
                   relay_handler=None,
                   celery_app=None,
                   *args,
                   **kwargs):
from spylunking.log.setup_logging import build_colorized_logger
from celery_connectors.utils import ev
from celery_connectors.utils import build_sample_msgs
from celery_connectors.build_ssl_options import build_ssl_options
from celery_connectors.run_publisher import run_publisher


# Credits and inspirations from these great sources:
#
# https://github.com/celery/kombu/blob/master/examples/rpc-tut6/rpc_server.py
# https://gist.github.com/oubiwann/3843016
# https://gist.github.com/eavictor/ee7856581619ac60643b57987b7ed580#file-mq_kombu_rpc_server-py
# https://github.com/Skablam/kombu-examples
# https://gist.github.com/mlavin/6671079

name = ev("APP_NAME", "robopub")
log = build_colorized_logger(
    name=name)


broker_url = ev("PUB_BROKER_URL", "pyamqp://*****:*****@localhost:5672//")
exchange_name = ev("PUBLISH_EXCHANGE", "")
exchange_type = ev("PUBLISH_EXCHANGE_TYPE", "")
routing_key = ev("PUBLISH_ROUTING_KEY", "reporting.accounts")
queue_name = ev("PUBLISH_QUEUE", "reporting.accounts")
priority_routing = {"high": queue_name}
use_exchange = Exchange(exchange_name, type=exchange_type)
use_routing_key = routing_key
use_queue = Queue(queue_name, exchange=use_exchange, routing_key=routing_key)
task_queues = [
    use_queue
Example #18
0
from kombu import Exchange, Queue
from spylunking.log.setup_logging import build_colorized_logger
from celery_connectors.utils import ev
from celery_connectors.build_ssl_options import build_ssl_options
from celery_connectors.run_jtoc_relay import run_jtoc_relay

# Credits and inspirations from these great sources:
#
# https://github.com/celery/kombu/blob/master/examples/rpc-tut6/rpc_server.py
# https://gist.github.com/oubiwann/3843016
# https://gist.github.com/eavictor/ee7856581619ac60643b57987b7ed580#file-mq_kombu_rpc_server-py
# https://github.com/Skablam/kombu-examples
# https://gist.github.com/mlavin/6671079

name = ev("APP_NAME", "jtoc_relay")
log = build_colorized_logger(name=name)

broker_url = ev("SUB_BROKER_URL",
                "pyamqp://*****:*****@localhost:5672//")
exchange_name = ev("CONSUME_EXCHANGE", "ecomm.api")
exchange_type = ev("CONSUME_EXCHANGE_TYPE", "topic")
routing_key = ev("CONSUME_ROUTING_KEY", "ecomm.api.west")
queue_name = ev("CONSUME_QUEUE", "ecomm.api.west")
prefetch_count = int(float(ev("PREFETCH_COUNT", "1")))
priority_routing = {"high": queue_name, "low": queue_name}
use_exchange = Exchange(exchange_name, type=exchange_type)
use_queue = Queue(queue_name, exchange=use_exchange, routing_key=routing_key)
task_queues = [use_queue]
ssl_options = build_ssl_options()
    def test_rabbitmq_consuming(self):

        # Integration Test the Subscriber Processor
        # This test just fills the queue for processing
        num_to_consume = 50000
        num_sent = 0
        num_to_send = num_to_consume
        msgs_to_send = []

        msgs_by_id = {}

        self.exchange_name = ev("LOAD_TEST_EXCHANGE", "reporting")
        self.routing_key = ev("LOAD_TEST_ROUTING_KEY", "reporting.accounts")
        self.queue_name = ev("LOAD_TEST_QUEUE", "reporting.accounts")

        log.info(("Publishing {}/{} "
                  "ex={} rk={} broker={}").format(num_sent, num_to_send,
                                                  self.exchange_name,
                                                  self.routing_key,
                                                  self.pub_auth_url))

        pub_retry = True
        not_done_publishing = True

        test_values = {"test_name": "large messages"}

        if len(msgs_to_send) == 0:
            while len(msgs_to_send) != num_to_send:

                test_msg = self.build_user_conversion_event_msg(test_values)
                msgs_to_send.append(test_msg)
                msgs_by_id[test_msg["msg_id"]] = False
        # end of building messages before slower publishing calls

        while not_done_publishing:

            if (num_sent % 1000 == 0) and num_sent > 0:
                log.info(("Published {} for "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, num_to_send),
                              num_sent, num_to_send))
            # end of if print for tracing

            msg_body = None
            if num_sent < len(msgs_to_send):
                msg_body = msgs_to_send[num_sent]

            self.publish(body=msg_body,
                         exchange=self.exchange_name,
                         routing_key=self.routing_key,
                         queue=self.queue_name,
                         priority=0,
                         ttl=None,
                         serializer=self.pub_serializer,
                         retry=pub_retry)

            num_sent += 1

            if num_sent >= num_to_send:
                log.info(("Published {} ALL "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, num_to_send),
                              num_sent, num_to_send))

                not_done_publishing = False
            elif num_sent >= len(msgs_to_send):
                log.info(("Published {} all "
                          "{}/{} messages").format(
                              get_percent_done(num_sent, len(msgs_to_send)),
                              num_sent, num_to_send))

                not_done_publishing = False
            # if should stop

        # end of not_done_publishing

        assert (num_sent == num_to_consume)

        os.system("list-queues.sh")

        log.info("")
        log.info(("display messages in the queues "
                  "with routing_key={} again with:").format(self.routing_key))
        log.info("list-queues.sh")
        log.info("")
    def __init__(self):
        """__init__"""

        self.recv_msgs = []

        # save every nth number of messages
        self.save_after_num = int(
            ev("SAVE_AFTER_NUM",
               "100"))

        # shutdown after this number of messages
        self.stop_after_num = int(
            ev("STOP_AFTER_NUM",
               "-1"))

        if self.save_after_num < 0:
            self.save_after_num = 1
        if self.stop_after_num < 0:
            self.stop_after_num = None

        # shutdown if this file is found
        self.stop_for_file = ev(
                "STOP_FILE",
                "/tmp/stop-recording-csv")

        self.dataset_name = ev(
                "DS_NAME",
                "netdata")

        self.save_dir = ev(
                "DS_DIR",
                "/tmp")

        self.save_to_file = ev(
                "OUTPUT_CSV",
                "{}/{}-{}.csv".format(
                    self.save_dir,
                    self.dataset_name,
                    rnow("%Y-%m-%d-%H-%M-%S")))

        self.archive_file = ev(
                "ARCHIVE_JSON",
                "{}/packets-{}-{}.json".format(
                    self.save_dir,
                    self.dataset_name,
                    rnow("%Y-%m-%d-%H-%M-%S")))

        self.debug = bool(ev(
                "DEBUG_PACKETS",
                "0") == "1")

        self.df = None
        self.last_df = None

        self.eth_keys = {"eth_id": "id"}
        self.ip_keys = {"ip_id": "id"}
        self.ipvsix_keys = {"ipvsix_id": "id"}
        self.icmp_keys = {"icmp_id": "id"}
        self.arp_keys = {"arp_id": "id"}
        self.tcp_keys = {"tcp_id": "id"}
        self.udp_keys = {"udp_id": "id"}
        self.dns_keys = {"dns_id": "id"}
        self.raw_keys = {"raw_id": "id"}
        self.pad_keys = {"pad_id": "id"}
        self.all_keys = {}
        self.all_keys_list = []

        self.all_eth = []
        self.all_ip = []
        self.all_ipvsix = []
        self.all_icmp = []
        self.all_arp = []
        self.all_tcp = []
        self.all_udp = []
        self.all_dns = []
        self.all_raw = []
        self.all_pad = []
        self.all_flat = []
        self.all_rows = []

        # noqa https://github.com/jay-johnson/antinex-client/blob/5fbcefaaed3d979b3c0829447b61592d5910ef22/antinex_client/build_ai_client_from_env.py#L19
        self.client = build_ai_client_from_env()

        # the client uses environment variables:
        # noqa https://github.com/jay-johnson/antinex-client/blob/5fbcefaaed3d979b3c0829447b61592d5910ef22/antinex_client/consts.py#L23
        # here is an example of what to export:
        # noqa https://github.com/jay-johnson/antinex-client/blob/master/examples/example-prediction.env
        self.request_dict = {}
        if ANTINEX_PUBLISH_ENABLED:
            if os.path.exists(ANTINEX_PUBLISH_REQUEST_FILE):
                with open(ANTINEX_PUBLISH_REQUEST_FILE, "r") as f:
                    self.request_dict = json.loads(f.read())
Example #21
0
#!/usr/bin/env python

from spylunking.log.setup_logging import build_colorized_logger
from celery_connectors.utils import ev
from celery_connectors.message_processor import MessageProcessor

name = "msg-proc"
log = build_colorized_logger(name=name)

log.info("Start - {}".format(name))

# want to change where you're subscribing vs publishing?
sub_ssl_options = {}
sub_auth_url = ev("SUB_BROKER_URL", "redis://localhost:6379/0")
pub_ssl_options = {}
pub_auth_url = ev("PUB_BROKER_URL", "redis://localhost:6379/0")

# start the message processor
msg_proc = MessageProcessor(name=name,
                            sub_auth_url=sub_auth_url,
                            sub_ssl_options=sub_ssl_options,
                            pub_auth_url=pub_auth_url,
                            pub_ssl_options=pub_ssl_options)

# configure where this is consuming:
queue = ev("CONSUME_QUEUE", "user.events.conversions")

# Relay Publish Hook - is disabled for this example
# where is it sending handled messages using a publish-hook or auto-caching:
exchange = None
routing_key = None
Example #22
0
    def handle_relay(self,
                     body={},
                     message={},
                     relay_exchange=None,
                     relay_routing_key=None,
                     serializer="json",
                     src_exchange=None,
                     src_routing_key=None):
        """
        Allow derived classes to customize
        how they 'handle_relay'
        """

        task_result = None
        last_step = "validating"

        if not relay_exchange and not relay_routing_key:
            log.error(("Relay is misconfigured: please set either a "
                       "relay_exchange={} or a relay_routing_key={}").format(
                           relay_exchange, relay_routing_key))
            return False

        try:

            last_step = "setting up base relay payload"
            base_relay_payload = {"org_msg": body, "relay_name": self.name}

            if self.verbose:
                log.debug("build relay_payload")

            last_step = "building relay payload"
            relay_payload = build_msg(base_relay_payload)

            if self.verbose:
                log.info(("relay ex={} rk={} id={}").format(
                    relay_exchange, relay_routing_key,
                    relay_payload["msg_id"]))

            last_step = "setting up task"

            task_name = ev(
                "RELAY_TASK_NAME",
                "ecomm_app.ecommerce.tasks." + "handle_user_conversion_events")
            if "task_name" in body:
                task_name = body["task_name"]

            now = datetime.datetime.now().isoformat()

            use_msg_id = ""
            if "msg_id" in body:
                use_msg_id = body["msg_id"]
            else:
                use_msg_id = relay_payload["msg_id"]

            source_info = {
                "relay": self.name,
                "src_exchange": src_exchange,
                "src_routing_key": src_routing_key
            }

            publish_body = {
                "account_id": 999,
                "subscription_id": 321,
                "stripe_id": 876,
                "created": now,
                "product_id": "JJJ",
                "version": 1,
                "r_id": relay_payload["msg_id"],
                "msg_id": use_msg_id
            }

            if self.verbose:
                log.info(("relay msg_id={} body={} "
                          "broker={} backend={}").format(
                              use_msg_id, publish_body, self.relay_broker_url,
                              self.relay_transport_options))
            else:
                log.info(("relay msg_id={} body={}").format(
                    use_msg_id,
                    str(publish_body)[0:30]))

            last_step = "send start - app"

            # http://docs.celeryproject.org/en/latest/reference/celery.html#celery.Celery
            app = Celery(
                broker=self.relay_broker_url,
                backend=self.relay_backend_url,
                transport_otions=self.relay_transport_options,
                task_ignore_result=True)  # needed for cleaning up task results

            # these are targeted at optimizing processing on long-running tasks
            # while increasing reliability

            # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-worker_prefetch_multiplier
            app.conf.worker_prefetch_multiplier = 1
            # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_heartbeat
            app.conf.broker_heartbeat = 240  # seconds
            # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_connection_max_retries
            app.conf.broker_connection_max_retries = None
            # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-task_acks_late
            app.conf.task_acks_late = True

            # http://docs.celeryproject.org/en/latest/userguide/calling.html#calling-retry
            task_publish_retry_policy = {
                "interval_max": 1,
                "max_retries": 120,  # None - forever
                "interval_start": 0.1,
                "interval_step": 0.2
            }
            app.conf.task_publish_retry_policy = task_publish_retry_policy

            last_step = "send start - task={}".format(task_name)
            with app.producer_or_acquire(producer=None) as producer:
                """
                http://docs.celeryproject.org/en/latest/reference/celery.app.task.html#celery.app.task.Task.apply_async
                retry (bool) – If enabled sending of the task message will be
                            retried in the event of connection loss or failure.
                            Default is taken from the task_publish_retry setting.
                            Note that you need to handle the producer/connection
                            manually for this to work.

                With a redis backend connection on
                restore of a broker the first time it appears to
                hang here indefinitely:

                task_result.get()

                Please avoid getting the relay task results
                until this is fixed
                """

                task_result = app.send_task(task_name,
                                            (publish_body, source_info),
                                            retry=True,
                                            producer=producer,
                                            expires=300)
            # end of app producer block

            last_step = "send done - task={}".format(task_name)
            if task_result:
                log.info(("relay done with msg_id={}").format(body["msg_id"]))

            if "relay_simulate_processing_lag" in body["data"]:
                relay_sleep_duration = \
                    body["data"]["relay_simulate_processing_lag"]
                log.info(("task - {} - simulating processing lag "
                          "sleep={} seconds").format(task_name,
                                                     relay_sleep_duration))
                time.sleep(float(relay_sleep_duration))
            # end of handling adding artifical lag for testing Celery

            if self.verbose:
                if "msg_id" in body:
                    log.info(
                        ("relay done - "
                         "msg_id={} r_id={}").format(use_msg_id,
                                                     relay_payload["msg_id"]))
                else:
                    log.info(("relay done - "
                              "msg_id={} r_id={}"
                              "body={}").format(use_msg_id,
                                                relay_payload["msg_id"],
                                                str(body)[0:30]))

            # end of logging

        except Exception as e:
            log.error(("Task Relay failed: with ex={} when sending "
                       "to relay_exchange={} relay_routing_key={} "
                       "last_step={}").format(e, relay_exchange,
                                              relay_routing_key, last_step))
            return False
        # end of try/ex

        return True
conn_attrs = {
    "task_default_queue": "celery.redis.sub",
    "task_default_exchange": "celery.redis.sub",
    # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-worker_prefetch_multiplier
    "worker_prefetch_multiplier": 1,  # consume 1 message at a time
    # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-worker_prefetch_multiplier
    "prefetch_count": 3,  # consume 1 message at a time per worker (3 workers)
    # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_heartbeat
    "broker_heartbeat": 240,  # in seconds
    # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-broker_connection_max_retries
    "broker_connection_max_retries": None,  # None is forever
    # http://docs.celeryproject.org/en/latest/userguide/configuration.html#std:setting-task_acks_late
    "task_acks_late": True,  # on consume do not send an immediate ack back
    "task_publish_retry_policy": task_publish_retry_policy
}

app = Celery()
sub = Subscriber("redis-subscriber",
                 ev("SUB_BROKER_URL", "redis://localhost:6379/0"), app,
                 ssl_options, **conn_attrs)

# Now consume:
queue = ev("CONSUME_QUEUE", "reporting.accounts")
sub.consume(callback=handle_message,
            queue=queue,
            exchange=None,
            routing_key=None,
            prefetch_count=conn_attrs["prefetch_count"])

log.info("End - {}".format(name))
Example #24
0
def prepare_new_dataset():
    """prepare_new_dataset"""
    clean_dir = ev(
        "OUTPUT_DIR",
        "/tmp")
    clean_file = ev(
        "CLEANED_FILE",
        "{}/cleaned_attack_scans.csv".format(
            clean_dir))
    fulldata_file = ev(
        "FULLDATA_FILE",
        "{}/fulldata_attack_scans.csv".format(
            clean_dir))
    dataset_dir = ev(
        "DS_DIR",
        "/opt/antinex/datasets")
    csv_glob_path = ev(
        "DS_GLOB_PATH",
        "{}/*/*.csv".format(
            dataset_dir))

    pipeline_files = find_all_pipeline_csvs(
        csv_glob_path=csv_glob_path)

    post_proc_rules = {
        "drop_columns": [
            "src_file",
            "raw_id",
            "raw_load",
            "raw_hex_load",
            "raw_hex_field_load",
            "pad_load",
            "eth_dst",  # need to make this an int
            "eth_src",  # need to make this an int
            "ip_dst",   # need to make this an int
            "ip_src"    # need to make this an int
        ],
        "predict_feature": "label_name"
    }

    label_rules = {
        "set_if_above": 85,
        "labels": ["not_attack", "attack"],
        "label_values": [0, 1]
    }

    log.info("building csv")

    save_node = build_csv(
        pipeline_files=pipeline_files,
        fulldata_file=fulldata_file,
        clean_file=clean_file,
        post_proc_rules=post_proc_rules,
        label_rules=label_rules)

    if save_node["status"] == VALID:
        log.info("Successfully process datasets:")

        if ev("SHOW_SUMMARY", "1") == "1":
            log.info(("Full csv: {}")
                     .format(save_node["fulldata_file"]))
            log.info(("Full meta: {}")
                     .format(save_node["fulldata_metadata_file"]))
            log.info(("Clean csv: {}")
                     .format(save_node["clean_file"]))
            log.info(("Clean meta: {}")
                     .format(save_node["clean_metadata_file"]))
            log.info("------------------------------------------")
            log.info(("Predicting Feature: {}")
                     .format(save_node["feature_to_predict"]))
            log.info(("Features to Process: {}")
                     .format(ppj(save_node["features_to_process"])))
            log.info(("Ignored Features: {}")
                     .format(ppj(save_node["ignore_features"])))
            log.info("------------------------------------------")
        # end of show summary

        log.info("")
        log.info("done saving csv:")
        log.info("Full: {}".format(
            save_node["fulldata_file"]))
        log.info("Cleaned (no-NaNs in columns): {}".format(
            save_node["clean_file"]))
        log.info("")
    else:
        log.info("Failed to process datasets")
Example #25
0
def run_main(need_response=False, callback=None):
    """run_main

    start the packet consumers and the packet processors

    :param need_response: should send response back to publisher
    :param callback: handler method
    """

    stop_file = ev("STOP_FILE", "/opt/stop_recording")

    num_workers = int(ev("NUM_WORKERS", "1"))
    shutdown_msg = "SHUTDOWN"

    log.info("Start - {}".format(name))

    log.info("Creating multiprocessing queue")
    tasks = multiprocessing.JoinableQueue()
    queue_to_consume = multiprocessing.Queue()
    host = "localhost"

    # Start consumers
    log.info("Starting Consumers to process queued tasks")
    consumers = start_consumers_for_queue(
        num_workers=num_workers,
        tasks=tasks,
        queue_to_consume=queue_to_consume,
        shutdown_msg=shutdown_msg,
        consumer_class=WorkerToProcessPackets,
        callback=callback)

    log.info("creating socket")
    skt = create_layer_2_socket()
    log.info("socket created")

    not_done = True
    while not_done:

        if not skt:
            log.info("Failed to create layer 2 socket")
            log.info("Please make sure to run as root")
            not_done = False
            break

        try:
            if os.path.exists(stop_file):
                log.info(("Detected stop_file={}").format(stop_file))
                not_done = False
                break
            # stop if the file exists

            # Only works on linux
            packet = skt.recvfrom(65565)

            if os.path.exists(stop_file):
                log.info(("Detected stop_file={}").format(stop_file))
                not_done = False
                break
            # stop if the file was created during a wait loop

            tasks.put(NetworkPacketTask(source=host, payload=packet))

        except KeyboardInterrupt as k:
            log.info("Stopping")
            not_done = False
            break
        except Exception as e:
            log.error(("Failed reading socket with ex={}").format(e))
            not_done = False
            break
        # end of try/ex during socket receving

    # end of while processing network packets

    log.info(("Shutting down consumers={}").format(len(consumers)))

    shutdown_consumers(num_workers=num_workers, tasks=tasks)

    # Wait for all of the tasks to finish
    if need_response:
        log.info("Waiting for tasks to finish")
        tasks.join()

    log.info("Done waiting for tasks to finish")
Example #26
0
def build_csv(
        pipeline_files=[],
        fulldata_file=None,
        clean_file=None,
        post_proc_rules=None,
        label_rules=None,
        metadata_filename="metadata.json"):
    """build_csv

    :param pipeline_files: files to process
    :param fulldata_file: output all columns to this csv file
    :param clean_file: output all numeric-ready columns to this csv file
    :param post_proc_rules: rules after building the DataFrame
    :param label_rules: labeling rules
    :param metadata_filename: metadata
    """

    save_node = {
        "status": INVALID,
        "pipeline_files": pipeline_files,
        "post_proc_rules": post_proc_rules,
        "label_rules": label_rules,
        "fulldata_file": fulldata_file,
        "fulldata_metadata_file": None,
        "clean_file": clean_file,
        "clean_metadata_file": None,
        "features_to_process": [],
        "feature_to_predict": None,
        "ignore_features": [],
        "df_json": {}
    }

    if not fulldata_file:
        log.error("missing fulldata_file - stopping")
        save_node["status"] = INVALID
        return save_node
    if not clean_file:
        log.error("missing clean_file - stopping")
        save_node["status"] = INVALID
        return save_node

    log.info("build_csv - START")

    common_headers, \
        headers_dict = find_all_headers(
                            pipeline_files=pipeline_files)

    log.info(("num common_headers={} headers={}")
             .format(len(common_headers),
                     common_headers))

    # since the headers can be different we rebuild a new one:

    hdrs = {}
    for h in common_headers:
        hdrs[h] = None

    features_to_process = []
    feature_to_predict = None
    ignore_features = []

    set_if_above = None
    labels = []
    label_values = []
    if label_rules:
        set_if_above = label_rules["set_if_above"]
        labels = label_rules["labels"]
        label_values = label_rules["label_values"]

    all_rows = []
    num_done = 0
    total_files = len(pipeline_files)
    for c in pipeline_files:
        log.info(("merging={}/{} csv={}")
                 .format(num_done,
                         total_files,
                         c))
        cf = pd.read_csv(c)
        log.info((" processing rows={}")
                 .format(len(cf.index)))
        for index, row in cf.iterrows():
            valid_row = True
            new_row = copy.deepcopy(hdrs)
            new_row["src_file"] = c
            for k in hdrs:
                if k in row:
                    new_row[k] = row[k]
            # end of for all headers to copy in

            if label_rules:
                test_rand = random.randint(0, 100)
                if test_rand > set_if_above:
                    new_row["label_value"] = label_values[1]
                    new_row["label_name"] = labels[1]
                else:
                    new_row["label_value"] = label_values[0]
                    new_row["label_name"] = labels[0]
            # end of applying label rules

            if valid_row:
                all_rows.append(new_row)
        # end of for all rows in this file

        num_done += 1
    # end of building all files into one list

    log.info(("fulldata rows={} generating df")
             .format(len(all_rows)))
    df = pd.DataFrame(all_rows)
    log.info(("df rows={} headers={}")
             .format(len(df.index),
                     df.columns.values))

    if ev("CONVERT_DF",
          "0") == "1":
        log.info("converting df to json")
        save_node["df_json"] = df.to_json()

    if clean_file:
        log.info(("writing fulldata_file={}")
                 .format(fulldata_file))
        df.to_csv(fulldata_file,
                  sep=',',
                  encoding='utf-8',
                  index=False)
        log.info(("done writing fulldata_file={}")
                 .format(fulldata_file))

        if post_proc_rules:

            clean_metadata_file = ""

            feature_to_predict = "label_name"
            features_to_process = []
            ignore_features = []
            if label_rules:
                ignore_features = [feature_to_predict]

            if "drop_columns" in post_proc_rules:
                for p in post_proc_rules["drop_columns"]:
                    if p in headers_dict:
                        ignore_features.append(p)
                # post proce filter more features out
                # for non-int/float types

                for d in df.columns.values:
                    add_this_one = True
                    for i in ignore_features:
                        if d == i:
                            add_this_one = False
                            break
                    if add_this_one:
                        features_to_process.append(d)
                # for all df columns we're not ignoring...
                # add them as features to process

                fulldata_metadata_file = "{}/fulldata_{}".format(
                    "/".join(fulldata_file.split("/")[:-1]),
                    metadata_filename)
                log.info(("writing fulldata metadata file={}")
                         .format(fulldata_metadata_file))
                header_data = {"headers": list(df.columns.values),
                               "output_type": "fulldata",
                               "pipeline_files": pipeline_files,
                               "post_proc_rules": post_proc_rules,
                               "label_rules": label_rules,
                               "features_to_process": features_to_process,
                               "feature_to_predict": feature_to_predict,
                               "ignore_features": ignore_features,
                               "created": rnow()}

                with open(fulldata_metadata_file, "w") as otfile:
                    otfile.write(str(ppj(header_data)))

                keep_these = features_to_process
                keep_these.append(feature_to_predict)

                log.info(("creating new clean_file={} "
                          "keep_these={} "
                          "predict={}")
                         .format(clean_file,
                                 keep_these,
                                 feature_to_predict))

                # need to remove all columns that are all nan
                clean_df = df[keep_these].dropna(
                                axis=1, how='all').dropna()

                cleaned_features = clean_df.columns.values
                cleaned_to_process = []
                cleaned_ignore_features = []
                for c in cleaned_features:
                    if c == feature_to_predict:
                        cleaned_ignore_features.append(c)
                    else:
                        keep_it = True
                        for ign in ignore_features:
                            if c == ign:
                                cleaned_ignore_features.append(c)
                                keep_it = False
                                break
                        # end of for all feaures to remove
                        if keep_it:
                            cleaned_to_process.append(c)
                # end of new feature columns

                log.info(("writing DROPPED clean_file={} "
                          "features_to_process={} "
                          "ignore_features={} "
                          "predict={}")
                         .format(clean_file,
                                 cleaned_to_process,
                                 cleaned_ignore_features,
                                 feature_to_predict))

                write_clean_df = clean_df.drop(
                    columns=cleaned_ignore_features
                )
                log.info(("cleaned_df rows={}")
                         .format(len(write_clean_df.index)))
                write_clean_df.to_csv(
                         clean_file,
                         sep=',',
                         encoding='utf-8',
                         index=False)

                clean_metadata_file = "{}/cleaned_{}".format(
                    "/".join(clean_file.split("/")[:-1]),
                    metadata_filename)
                log.info(("writing clean metadata file={}")
                         .format(clean_metadata_file))
                header_data = {"headers": list(write_clean_df.columns.values),
                               "output_type": "clean",
                               "pipeline_files": pipeline_files,
                               "post_proc_rules": post_proc_rules,
                               "label_rules": label_rules,
                               "features_to_process": cleaned_to_process,
                               "feature_to_predict": feature_to_predict,
                               "ignore_features": cleaned_ignore_features,
                               "created": rnow()}
                with open(clean_metadata_file, "w") as otfile:
                    otfile.write(str(ppj(header_data)))
            else:

                for d in df.columns.values:
                    add_this_one = True
                    for i in ignore_features:
                        if d == i:
                            add_this_one = False
                            break
                    if add_this_one:
                        features_to_process.append(d)
                # for all df columns we're not ignoring...
                # add them as features to process

                fulldata_metadata_file = "{}/fulldata_{}".format(
                    "/".join(fulldata_file.split("/")[:-1]),
                    metadata_filename)
                log.info(("writing fulldata metadata file={}")
                         .format(fulldata_metadata_file))
                header_data = {"headers": list(df.columns.values),
                               "output_type": "fulldata",
                               "pipeline_files": pipeline_files,
                               "post_proc_rules": post_proc_rules,
                               "label_rules": label_rules,
                               "features_to_process": features_to_process,
                               "feature_to_predict": feature_to_predict,
                               "ignore_features": ignore_features,
                               "created": rnow()}

                with open(fulldata_metadata_file, "w") as otfile:
                    otfile.write(str(ppj(header_data)))

                keep_these = features_to_process
                keep_these.append(feature_to_predict)

                log.info(("creating new clean_file={} "
                          "keep_these={} "
                          "predict={}")
                         .format(clean_file,
                                 keep_these,
                                 feature_to_predict))

                # need to remove all columns that are all nan
                clean_df = df[keep_these].dropna(
                                axis=1, how='all').dropna()

                cleaned_features = clean_df.columns.values
                cleaned_to_process = []
                cleaned_ignore_features = []
                for c in cleaned_features:
                    if c == feature_to_predict:
                        cleaned_ignore_features.append(c)
                    else:
                        keep_it = True
                        for ign in ignore_features:
                            if c == ign:
                                cleaned_ignore_features.append(c)
                                keep_it = False
                                break
                        # end of for all feaures to remove
                        if keep_it:
                            cleaned_to_process.append(c)
                # end of new feature columns

                log.info(("writing DROPPED clean_file={} "
                          "features_to_process={} "
                          "ignore_features={} "
                          "predict={}")
                         .format(clean_file,
                                 cleaned_to_process,
                                 cleaned_ignore_features,
                                 feature_to_predict))

                write_clean_df = clean_df.drop(
                    columns=cleaned_ignore_features
                )
                log.info(("cleaned_df rows={}")
                         .format(len(write_clean_df.index)))
                write_clean_df.to_csv(
                         clean_file,
                         sep=',',
                         encoding='utf-8',
                         index=False)

                clean_metadata_file = "{}/cleaned_{}".format(
                    "/".join(clean_file.split("/")[:-1]),
                    metadata_filename)
                log.info(("writing clean metadata file={}")
                         .format(clean_metadata_file))
                header_data = {"headers": list(write_clean_df.columns.values),
                               "output_type": "clean",
                               "pipeline_files": pipeline_files,
                               "post_proc_rules": post_proc_rules,
                               "label_rules": label_rules,
                               "features_to_process": cleaned_to_process,
                               "feature_to_predict": feature_to_predict,
                               "ignore_features": cleaned_ignore_features,
                               "created": rnow()}
                with open(clean_metadata_file, "w") as otfile:
                    otfile.write(str(ppj(header_data)))

            # end of if/else

            save_node["clean_file"] = clean_file
            save_node["clean_metadata_file"] = clean_metadata_file

            log.info(("done writing clean_file={}")
                     .format(clean_file))
        # end of post_proc_rules

        save_node["fulldata_file"] = fulldata_file
        save_node["fulldata_metadata_file"] = fulldata_metadata_file

        save_node["status"] = VALID
    # end of writing the file

    save_node["features_to_process"] = features_to_process
    save_node["feature_to_predict"] = feature_to_predict
    save_node["ignore_features"] = ignore_features

    log.info("build_csv - END")

    return save_node
Example #27
0
    log.info(("Done with msg_id={} result={}").format(body["msg_id"],
                                                      result.get()))

    # now that the message has been
    # sent to the celery ecomm worker
    # we can ack the message which
    # deletes it from the source queue
    # the message processor uses
    message.ack()


# end of relay_callback

# want to change where you're subscribing vs publishing?
sub_ssl_options = {}
sub_auth_url = ev("SUB_BROKER_URL",
                  "pyamqp://*****:*****@localhost:5672//")
pub_ssl_options = {}
pub_auth_url = ev("PUB_BROKER_URL", "redis://localhost:6379/0")

# start the message processor
msg_proc = MessageProcessor(name=name,
                            sub_auth_url=sub_auth_url,
                            sub_ssl_options=sub_ssl_options,
                            pub_auth_url=pub_auth_url,
                            pub_ssl_options=pub_ssl_options)

# configure where this is consuming:
queue = ev("CONSUME_QUEUE", "user.events.conversions")

# Relay Publish Hook - sending to Redis
# where is it sending handled messages using a publish-hook or auto-caching:
Example #28
0
    def test_rabbitmq_consuming(self):

        # Integration Test the Consuming Worker with 50,0000 messages
        # This test just uses send_task for publishing
        num_to_consume = 50000
        num_sent = 0
        num_to_send = num_to_consume
        msgs_to_send = []

        msgs_by_id = {}

        not_done_publishing = True

        test_values = {"test_name": "large messages"}

        if len(msgs_to_send) == 0:
            while len(msgs_to_send) != num_to_send:
                test_msg = self.build_user_conversion_event_msg(test_values)
                msgs_to_send.append(test_msg)
                msgs_by_id[test_msg["msg_id"]] = False
        # end of building messages before slower publishing calls

        pub_auth_url = ev("RELAY_WORKER_BROKER_URL",
                          "pyamqp://*****:*****@localhost:5672//")
        path_to_config_module = "ecomm_app.ecommerce.celeryconfig_pub_sub"

        app = ecomm_app.ecommerce.tasks.get_celery_app(
                name="demo",
                auth_url=pub_auth_url,
                path_to_config_module=path_to_config_module)

        task_name = "ecomm_app.ecommerce.tasks.handle_user_conversion_events"

        source_id = {"msg_proc": ev("TEST_RELAY_NAME",
                                    "test_ecomm_relay")}
        result = None

        log.info(("Sending broker={}")
                 .format(app.conf.broker_url))

        while not_done_publishing:

            if (num_sent % 1000 == 0) and num_sent > 0:
                log.info(("Published {} for "
                          "{}/{} messages")
                         .format(get_percent_done(num_sent,
                                                  num_to_send),
                                 num_sent,
                                 num_to_send))
            # end of if print for tracing

            msg_body = None
            if num_sent < len(msgs_to_send):
                msg_body = msgs_to_send[num_sent]

            result = app.send_task(task_name, (msg_body, source_id))

            num_sent += 1

            if num_sent >= num_to_send:
                log.info(("Published {} ALL "
                          "{}/{} messages")
                         .format(get_percent_done(num_sent,
                                                  num_to_send),
                                 num_sent,
                                 num_to_send))

                not_done_publishing = False
            elif num_sent >= len(msgs_to_send):
                log.info(("Published {} all "
                          "{}/{} messages result={}")
                         .format(get_percent_done(num_sent,
                                                  len(msgs_to_send)),
                                 num_sent,
                                 num_to_send,
                                 result))

                not_done_publishing = False
            # if should stop

        # end of not_done_publishing

        assert(num_sent == num_to_consume)

        log.info("")
        os.system("list-queues.sh")
        log.info("")
Example #29
0
def build_training_request(
        csv_file=ev("CSV_FILE", "/tmp/cleaned_attack_scans.csv"),
        meta_file=ev("CSV_META_FILE", "/tmp/cleaned_metadata.json"),
        predict_feature=ev("PREDICT_FEATURE", "label_value"),
        ignore_features=[
            "label_name",
            "ip_src",  # need to make this an int
            "ip_dst",  # need to make this an int
            "eth_src",  # need to make this an int
            "eth_dst"  # need to make this an int
        ],
        seed=None,
        test_size=float(ev("TEST_SIZE", "0.20")),
        preproc_rules=None):
    """build_training_request

    :param csv_file: csv file built with prepare_dataset.py
    :param meta_file: metadata file built with prepare_dataset.py
    :param predict_feature: feature (column) to predict
    :param ignore_features: features to remove from the csv
                            before the split of test + train
                            data
    :param seed: integer to seed
    :param test_size: percent of records to split into test
                      vs train
    :param preproc_rules: future preprocessing rules hooks
    """

    last_step = "not started"
    res = {
        "status": INVALID,
        "err": "",
        "csv_file": csv_file,
        "meta_file": meta_file,
        "meta_data": None,
        "seed": None,
        "test_size": test_size,
        "predict_feature": predict_feature,
        "features_to_process": [],
        "ignore_features": ignore_features,
        "X_train": None,
        "X_test": None,
        "Y_train": None,
        "Y_test": None
    }

    try:

        last_step = ("building seed={}").format(seed)

        log.debug(last_step)

        use_seed = seed
        if not use_seed:
            use_seed = 9

        res["seed"] = np.random.seed(use_seed)

        last_step = ("Loading csv={}").format(csv_file)

        log.info(last_step)

        if not os.path.exists(csv_file):
            res["status"] = ERROR
            res["err"] = ("Unable to find csv_file={}").format(csv_file)
            log.error(res["err"])
            return res
        # end of checking for a valid csv file on disk

        if not os.path.exists(meta_file):
            res["status"] = ERROR
            res["err"] = ("Unable to find meta_file={}").format(meta_file)
            log.error(res["err"])
            return res
        # end of checking for a valid metadata file on disk

        # load csv file into pandas dataframe
        df = pd.read_csv(csv_file)

        features_to_process = []
        meta_data = {}

        try:
            last_step = ("opening metadata={}").format(meta_file)
            log.debug(last_step)
            meta_data = json.loads(open(meta_file, "r").read())
            res["meta_data"] = meta_data
            if "post_proc_rules" in meta_data:
                if "drop_columns" in meta_data["post_proc_rules"]:
                    log.debug(("Found drop_columns={}").format(
                        meta_data["post_proc_rules"]["drop_columns"]))
                    for ign in meta_data["post_proc_rules"]["drop_columns"]:
                        ignore_features.append(ign)
        except Exception as e:
            res["error"] = ("Failed building ignore_features: "
                            "ignore_features={} meta={} meta_data={} "
                            "last_step='{}' ex='{}'").format(
                                ignore_features, meta_file, meta_data,
                                last_step, e)
            log.error(res["error"])
            res["status"] = ERROR
            return res
        # end of trying to lookup the meta data file
        # for non-int/float features to ignore

        last_step = ("metadata={} df has "
                     "columns={} ignore={}").format(meta_file,
                                                    df.columns.values,
                                                    ignore_features)

        log.info(last_step)

        for feature in df.columns.values:
            keep_it = True
            for ign in ignore_features:
                if feature == ign:
                    keep_it = False
            if keep_it:
                if feature != predict_feature:
                    features_to_process.append(feature)
        # end of for all features to process

        last_step = ("Done post-procecessing "
                     "Predicting={} with features={} "
                     "ignore_features={} records={}").format(
                         predict_feature, features_to_process, ignore_features,
                         len(df.index))

        log.info(last_step)

        res["predict_feature"] = predict_feature

        res["ignore_features"] = []
        for k in ignore_features:
            if k not in res["ignore_features"]:
                res["ignore_features"].append(k)
        res["features_to_process"] = []
        for k in features_to_process:
            if k not in res["features_to_process"]:
                if k != predict_feature:
                    res["features_to_process"].append(k)

        # split the data into training
        (res["X_train"], res["X_test"], res["Y_train"],
         res["Y_test"]) = train_test_split(df[features_to_process],
                                           df[predict_feature],
                                           test_size=test_size,
                                           random_state=res["seed"])

        last_step = ("Done splitting rows={} into "
                     "X_train={} X_test={} "
                     "Y_train={} Y_test={}").format(len(df.index),
                                                    len(res["X_train"]),
                                                    len(res["X_test"]),
                                                    len(res["Y_train"]),
                                                    len(res["Y_test"]))

        log.info(("Success: {}").format(last_step))

        res["err"] = ""
        res["status"] = VALID
    except Exception as e:
        res["status"] = ERROR
        res["err"] = ("Failed build_training_request "
                      "step='{}' with ex='{}'").format(last_step, e)
        log.error(("build_training_request: {}").format(res["err"]))
    # end of try/ex

    return res
Example #30
0
class BaseTestCase(unittest.TestCase):

    debug = False

    exchange_name = ev("TEST_EXCHANGE", "test.events")
    queue_name = ev("TEST_QUEUE", "test.events.conversions")
    routing_key = ev("TEST_ROUTING_KEY", "test.events.conversions")

    exchange = None
    queue = None

    rabbitmq_auth_url = ev("TEST_RABBITMQ_BROKER_URL", "pyamqp://*****:*****@localhost:5672//")
    redis_auth_url = ev("TEST_REDIS_BROKER_URL", "redis://localhost:6379/0")

    pub_auth_url = rabbitmq_auth_url
    sub_auth_url = rabbitmq_auth_url

    pub_ssl_options = {}
    sub_ssl_options = {}

    pub_attrs = {}
    sub_attrs = {}

    pub_serializer = "json"
    sub_serializer = "application/json"

    test_body = {}
    test_id = str(uuid.uuid4()).replace("-", "")

    test_body = {"account_id": 123,
                 "subscription_id": 456,
                 "stripe_id": 789,
                 "product_id": "ABC"}

    pub_msgs = []
    sub_msgs = []

    last_pub_msg = None
    last_sub_msg = None
    last_sub_callback = None

    def setUp(self):
        if self.debug:
            print("setUp")

        # state trips in the custom classes
        os.environ["TEST_STOP_DONE"] = "1"

        self.last_pub_msg = None
        self.last_sub_msg = None
        self.pub = None
        self.sub = None
        self.pub_msgs = []
        self.sub_msgs = []

        self.exchange_name = ev("TEST_EXCHANGE", "test.events")
        self.routing_key = ev("TEST_ROUTING_KEY", "test.events.conversions")
        self.queue_name = ev("TEST_QUEUE", "test.events.conversions")

        self.exchange = None
        self.queue = None
        self.last_sub_callback = None

    # end of setUp

    def tearDown(self):
        if self.debug:
            print("tearDown")
        self.pub = None
        self.sub = None
        self.exchange = None
        self.queue = None
        self.last_sub_callback = None
    # end of tearDown

    def handle_message(self,
                       body,
                       msg):

        log.info(("test={} BASETEST handle_message got "
                  "body={} msg={}")
                 .format(self.test_id,
                         body,
                         msg))

        if msg:
            msg.ack()
    # end of handle_message

    def connect_pub(self,
                    auth_url=None,
                    ssl_options={},
                    attrs={}):

        use_auth_url = self.pub_auth_url
        use_ssl_options = self.pub_ssl_options
        use_pub_attrs = self.pub_attrs

        if auth_url:
            use_auth_url = auth_url
        if len(ssl_options) > 0:
            use_ssl_options = ssl_options
        if len(ssl_options) > 0:
            use_pub_attrs = use_pub_attrs

        self.pub = Publisher("test-pub",
                             use_auth_url,
                             use_ssl_options)

    # end of connect_pub

    def connect_sub(self,
                    auth_url=None,
                    ssl_options={},
                    attrs={}):

        use_auth_url = self.sub_auth_url
        use_ssl_options = self.sub_ssl_options
        use_sub_attrs = self.sub_attrs

        if auth_url:
            use_auth_url = auth_url
        if len(ssl_options) > 0:
            use_ssl_options = ssl_options
        if len(ssl_options) > 0:
            use_sub_attrs = use_sub_attrs

        self.sub = KombuSubscriber("test-sub",
                                   use_auth_url,
                                   use_ssl_options)
    # end of connect_sub

    def build_msg(self,
                  test_values={}):

        body = {"test_id": self.test_id,
                "date": datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S"),
                "msg_id": str(uuid.uuid4()).replace("-", ""),
                "test_values": test_values}

        return body
    # end of build_msg

    def consume(self,
                callback=None,
                queue=queue,
                exchange=exchange,
                routing_key=routing_key,
                serializer="application/json",
                heartbeat=60,
                time_to_wait=5.0,
                forever=False,
                silent=True):

        if not callback:
            log.error(("Subscriber - Requires a callback handler for message"
                       "processing with signature definition: "
                       "def handle_message(self, body, message):")
                      .format(self.sub_auth_url,
                              self.sub_ssl_options))
            assert(callback)

        # if not connected, just connect with defaults
        if not self.sub:
            self.connect_sub()
            if not self.sub:
                log.error(("Subscriber - Failed to connect "
                           "to broker={} ssl={}")
                          .format(self.sub_auth_url,
                                  self.sub_ssl_options))
                assert(self.sub)

        if self.sub:

            self.sub.consume(callback=callback,
                             queue=queue,
                             exchange=exchange,
                             routing_key=routing_key,
                             serializer=serializer,
                             heartbeat=heartbeat,
                             forever=forever,
                             time_to_wait=time_to_wait,
                             silent=silent)

        else:
            log.info("Sub is None already - client should not call consume")
    # end of consume

    def publish(self,
                body=None,
                exchange=exchange,
                routing_key=routing_key,
                queue=queue,
                priority=0,
                ttl=None,
                serializer="json",
                retry=True,
                silent=True):

        # if no body for the message
        if not body:
            log.error(("Publisher - requires argument: "
                       "body=some_dictionary to test"))
            assert(body)

        # if not connected, just connect with defaults
        if not self.pub:
            self.connect_pub()
            if not self.pub:
                log.error(("Publisher - Failed to connect "
                           "to broker={} ssl={}")
                          .format(self.pub_auth_url,
                                  self.pub_ssl_options))
                assert(self.pub)

        if self.pub:
            self.pub.publish(body=body,
                             exchange=exchange,
                             routing_key=routing_key,
                             queue=queue,
                             serializer=serializer,
                             priority=priority,
                             ttl=ttl,
                             retry=retry,
                             silent=silent)
        else:
            log.info("Pub is None already - client should not call publish")