コード例 #1
0
 def setUp(self):
     schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/TCCDMDatum.avsc"
     self.reader_schema = Utils.load_schema(schema_file)
     self.writer_schema = Utils.load_schema(schema_file)
     self.serializer = AvroGenericSerializer(self.writer_schema)
     self.deserializer = AvroGenericDeserializer(self.reader_schema)
コード例 #2
0
    def __init__(self, conf, reset=False):
        """Set @reset to True to begin consuming at start of stream."""
        config = dict()
        self.topic = conf['kafka']['topic']
        config['bootstrap.servers'] = conf['kafka']['address']

        default_topic_config = {}
        default_topic_config["auto.offset.reset"] = "smallest"
        default_topic_config['enable.auto.commit'] = True
        config["default.topic.config"] = default_topic_config

        # Set the group ID.
        state = self._get_state_info(conf)
        if not reset and state:
            group_id = state['group_id']
        else:
            group_id = "CG_" + str(uuid.uuid4())
            self._update_state(conf,'group_id', group_id)
        config["group.id"] = group_id

        # Add SSL stuff
        if conf['kafka'].getboolean('ssl_enable'):
            config["security.protocol"] = 'ssl'
            config["ssl.ca.location"] = conf['kafka']['ca_path']
            config["ssl.certificate.location"] = conf['kafka']['cert_path']
            config["ssl.key.location"] = conf['kafka']['key_path']
            config["ssl.key.password"] = conf['kafka']['password']

        self.consumer = confluent_kafka.Consumer(config)
        self.consumer.subscribe([self.topic])

        p_schema = Utils.load_schema(conf['kafka']['schema'])
        c_schema = Utils.load_schema(conf['kafka']['schema'])
        self.deserializer = KafkaAvroGenericDeserializer(c_schema, p_schema)
コード例 #3
0
 def test_pretty_node_long(self):
     schema = self.writer_schema
     artifact_node = Utils.create_node(1, "Artifact", schema)
     artifact_node["properties"] = {}
     artifact_node["properties"]["path"] = \
             "/tmp/this/is/a/long/path/and/it/should/get/broken/up.txt"
     self.assertTrue("..." in Utils.pretty_node(artifact_node))
コード例 #4
0
 def setUp(self):
     union_schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/LabeledGraph.avsc"
     self.reader_schema = Utils.load_schema(union_schema_file)
     self.writer_schema = Utils.load_schema(union_schema_file)
     self.serializer = AvroGenericSerializer(self.writer_schema)
     self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                 self.writer_schema)
コード例 #5
0
    def setUp(self):
        union_schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledGraph.avsc"
        self.union_schema = Utils.load_schema(union_schema_file)

        nested_schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledEdge.avsc"
        self.nested_schema = Utils.load_schema(nested_schema_file)
コード例 #6
0
 def test_bad_edge_label(self):
     schema = self.writer_schema
     node1 = Utils.create_node(1, "unitOfExecution", schema)
     node2 = Utils.create_node(2, "agent", schema)
     bad_label = "badEdgeLabel"
     bad_edge = Utils.create_edge(node1, node2, bad_label, schema)
     is_valid = Utils.validate(schema, bad_edge)
     self.assertFalse(is_valid)
コード例 #7
0
 def test_optional_field_absent(self):
     schema = self.writer_schema
     node1 = Utils.create_node(1, "unitOfExecution", schema, True)
     node2 = Utils.create_node(2, "agent", schema, True)
     edge1 = Utils.create_edge(node1, node2, "wasAssociatedWith", schema)
     serialized_edge = self.serializer.serialize_to_bytes(edge1)
     deserialized_edge = \
             self.deserializer.deserialize_bytes(serialized_edge)
     self.assertTrue(deserialized_edge["properties"] is None)
コード例 #8
0
    def serialization_test_helper(self, schema, is_union):
        node_file_path = os.path.dirname(os.path.realpath(__file__)) + \
                "/testNodes.avro"
        edge_file_path = os.path.dirname(os.path.realpath(__file__)) + \
                "/testEdges.avro"

        # Create some nodes and an edge.
        node1 = Utils.create_node(1, "unitOfExecution", schema, True)
        node2 = Utils.create_node(2, "artifact", schema, True)
        edge1 = Utils.create_edge(node1, node2, "read", schema)

        if is_union:
            # Serialize the nodes and edge to files.
            with open(node_file_path, "wb") as node_file:
                self.serializer = AvroGenericSerializer(
                    self.writer_schema, node_file)
                self.serializer.serialize_to_file([node1, node2])
                self.serializer.close_file_serializer()

        with open(edge_file_path, "wb") as edge_file:
            self.serializer = AvroGenericSerializer(self.writer_schema,
                                                    edge_file)
            self.serializer.serialize_to_file([edge1])
            self.serializer.close_file_serializer()

        if is_union:
            # Deserialize from the files to records.
            with open(node_file_path, "rb") as node_file:
                self.deserializer = AvroGenericDeserializer(
                    self.reader_schema, self.writer_schema, node_file)
                deserialized_nodes = \
                        self.deserializer.deserialize_from_file()
                self.deserializer.close_file_deserializer()

        with open(edge_file_path, "rb") as edge_file:
            self.deserializer = AvroGenericDeserializer(
                self.reader_schema, self.writer_schema, edge_file)
            deserialized_edges = \
                    self.deserializer.deserialize_from_file()
            self.deserializer.close_file_deserializer()

        if is_union:
            # Check the deserialized nodes.
            self.assertTrue(len(deserialized_nodes) == 2)
            self.compare_nodes(node1, deserialized_nodes[0])
            self.compare_nodes(node2, deserialized_nodes[1])

        # Check the deserialized edges.
        self.assertTrue(len(deserialized_edges) == 1)
        self.compare_edges(edge1, deserialized_edges[0])

        if is_union:
            # Clean up the files
            os.remove(node_file_path)

        os.remove(edge_file_path)
コード例 #9
0
    def test_serialization_nested(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledEdge.avsc"
        self.writer_schema = Utils.load_schema(schema_file)
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledEdgev2.avsc"
        self.reader_schema = Utils.load_schema(schema_file)

        self.serializer = AvroGenericSerializer(self.writer_schema)
        self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                    self.writer_schema)
        self.serialization_test_helper(self.writer_schema, False)
コード例 #10
0
    def setUp(self):
        schema_file = os.path.dirname(os.path.realpath(__file__)) + \
                "/LabeledGraph.avsc"
        schema = Utils.load_schema(schema_file)

        self.reader_schema = schema
        self.writer_schema = schema
        self.node_schema = Utils.get_schema_by_name(
            self.writer_schema, TestUnionSchema._NODE_SCHEMA_FULLNAME)
        self.edge_schema = Utils.get_schema_by_name(
            self.writer_schema, TestUnionSchema._EDGE_SCHEMA_FULLNAME)

        self.serializer = AvroGenericSerializer(self.writer_schema)
        self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                    self.writer_schema)
コード例 #11
0
 def test_serialization_union(self):
     schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/LabeledGraphv2.avsc"
     self.reader_schema = Utils.load_schema(schema_file)
     self.deserializer = AvroGenericDeserializer(self.reader_schema,
                                                 self.writer_schema)
     self.serialization_test_helper(self.writer_schema, True)
コード例 #12
0
    def __init__(
            self, kafka_server, group_id, topic, duration, consume_all,
            consumer_schema_filename, producer_schema_filename, auto_offset,
            security_protocol=None, ca_cert=None, cert_location=None,
            key_location=None, key_pass=None, session_timeout=_DEFAULT_SESSION_TIMEOUT_MS):
        """Create a simple consumer.

        :param kafka_server: Connection string for bootstrap Kafka server.
        :param group_id: Group ID to use for distributed consumers.
        :param topic: Topic to consume from.
        :param duration: Duration to run for.
        :param consumer_schema_filename: Filename for consumer schema.
        :param producer_schema_filename: Filename for producer schema.
        :param auto_offset: Offset reset method to use for consumers.
        """
        super(Consumer, self).__init__()
        self.kafka_server = kafka_server
        self.group_id = group_id
        self.topic = topic
        self.duration = duration
        self.consume_all = consume_all
        self.consumer_schema_filename = consumer_schema_filename
        self.producer_schema_filename = producer_schema_filename
        self.serializer = KafkaAvroGenericSerializer(self.consumer_schema_filename)
        self.deserializer = KafkaAvroGenericDeserializer(
                self.consumer_schema_filename, self.producer_schema_filename)
        self.auto_offset = auto_offset
        self.consume_timeout = Consumer._DEFAULT_CONSUME_TIMEOUT

        # Handle a sigint shutdown cleanly.
        self._shutdown = False

        config = {}
        config["bootstrap.servers"] = self.kafka_server
        config["group.id"] = self.group_id
        config["session.timeout.ms"] = session_timeout

        if security_protocol:
            if security_protocol.lower() == "ssl":
                config["security.protocol"] = security_protocol
                config["ssl.ca.location"] = ca_cert
                config["ssl.certificate.location"] = cert_location
                config["ssl.key.location"] = key_location
                config["ssl.key.password"] = key_pass
            elif security_protocol.lower() == "plaintext":
                config["security.protocol"] = security_protocol
            else:
                msg = "Unsupported security protocol type for TC APIs: " + security_protocol
                raise ValueError(msg)

        default_topic_config = {}
        default_topic_config["auto.offset.reset"] = self.auto_offset
        config["default.topic.config"] = default_topic_config

        self.consumer = confluent_kafka.Consumer(config)
        self.consumer.subscribe([self.topic])
        self.latency_stats = Utils.Stats(
                1, "End-to-End Latency (including Avro serialization)", "ms")
コード例 #13
0
    def test_serialization(self):
        node1 = Utils.create_node(1, "unitOfExecution", True, self.schema)
        node2 = Utils.create_node(2, "artifact", True, self.schema)
        edge = Utils.create_edge(node1, node2, "read", True, self.schema)

        # Make sure serialization and deserialization is symmetric.
        json_edge = self.serializer.serialize_to_json(edge, True)
        deserialized_edge = self.deserializer.deserialize_json(json_edge)
        self.assertTrue(edge == deserialized_edge)

        # Make sure that the serializer can serialize to both bytes and json
        # without corrupting any internal state.  Also, test without making
        # the json serialization prettified.
        edge = Utils.create_edge(node1, node2, "modified", True, self.schema)
        self.serializer.serialize_to_bytes(edge)
        json_edge = self.serializer.serialize_to_json(edge)
        deserialized_edge = self.deserializer.deserialize_json(json_edge)
        self.assertTrue(edge == deserialized_edge)
コード例 #14
0
 def _run_record_type_test(self, generator, expected_value):
     parser = parsing.CDMParser(self.reader_schema)
     for i in range(20):
         record = generator.generate_random_record(
             TestCDMTypeParsing._KV_PAIRS)
         self.assertTrue(Utils.validate(self.writer_schema, record))
         self.assertTrue(parser.get_record_type(record) == expected_value)
         serialized = self.serializer.serialize_to_bytes(record)
         deserialized = self.deserializer.deserialize_bytes(serialized)
         self.assertTrue(
             parser.get_record_type(deserialized) == expected_value)
コード例 #15
0
    def serialization_test_helper(self, schema, is_union):
        node1 = Utils.create_node(1, "unitOfExecution", schema, True)
        node2 = Utils.create_node(2, "artifact", schema, True)
        edge1 = Utils.create_edge(node1, node2, "read", schema)

        if is_union:
            serialized_node1 = self.serializer.serialize_to_bytes(node1)
            deserialized_node1 = \
                    self.deserializer.deserialize_bytes(serialized_node1)
            self.compare_nodes(node1, deserialized_node1)

            serialized_node2 = self.serializer.serialize_to_bytes(node2)
            deserialized_node2 = \
                    self.deserializer.deserialize_bytes(serialized_node2)
            self.compare_nodes(node2, deserialized_node2)

        serialized_edge1 = self.serializer.serialize_to_bytes(edge1)
        deserialized_edge1 = \
                self.deserializer.deserialize_bytes(serialized_edge1)
        self.compare_edges(edge1, deserialized_edge1)
コード例 #16
0
    def test_union_schema(self):
        schema = self.writer_schema
        node1 = Utils.create_node(1, "unitOfExecution", schema, True)
        node2 = Utils.create_node(2, "agent", schema, True)
        edge1 = Utils.create_edge(node1, node2, "wasAssociatedWith", schema,
                                  True)

        serialized_node = self.serializer.serialize_to_bytes(node1)
        serialized_edge = self.serializer.serialize_to_bytes(edge1)

        deserialized_node = self.deserializer.deserialize_bytes(
            serialized_node)
        deserialized_edge = self.deserializer.deserialize_bytes(
            serialized_edge)

        # Don't convert these to strings, like in the Java code.  That results
        # in differences due to unicode strings for keys in the
        # Avro-deserialized in the Python 2.7 version, and we don't really
        # want to deal with that.
        self.assertTrue(node1 == deserialized_node)
        self.assertTrue(edge1 == deserialized_edge)
コード例 #17
0
        sys.exit(-1)
    if args['kafka_topic'] is None:
        print 'Argument --kafka-topic is required'
        sys.exit(-1)
    if args['kafka_group'] is None:
        print 'Argument --kafka-group is required'
        sys.exit(-1)

    kafka_client = KafkaClient(input_url)
    kafka_topic = kafka_client.topics[args['kafka_topic']]
    consumer = kafka_topic.get_balanced_consumer(
                consumer_group=args['kafka_group'], auto_commit_enable=True,
                auto_commit_interval_ms=1000, reset_offset_on_start=False,
                consumer_timeout_ms=100, fetch_wait_max_ms=0, managed=True)
    
    schema = Utils.load_schema(SCHEMA_FILE)
    deserializer = KafkaAvroGenericDeserializer(schema, schema)
    parser = CDMParser(schema)
    
    f = consumer
elif input_source == 'file':
    if input_format == 'avro':
        ifile = open(input_url, 'rb')
        schema = Utils.load_schema(SCHEMA_FILE)
        deserializer = KafkaAvroGenericDeserializer(schema, input_file=ifile)
        parser = CDMParser(schema)
        f = deserializer.deserialize_from_file()
    elif input_format == 'json':
        f = open(input_url, 'r')

# process records
コード例 #18
0
def main():
    parser = get_arg_parser()
    args = parser.parse_args()

    fileConfig("logging.conf")
    if args.v:
        logging.getLogger("tc").setLevel(logging.DEBUG)

    logger = logging.getLogger("tc")

    max_records = args.mr

    # Load the avro schema
    p_schema = Utils.load_schema(args.psf)

    # Kafka topic to publish to
    topic = args.topic

    # My producer ID
    producer_id = args.pid

    # Security protocol
    if args.sp is not None:
        SECURITY_PROTOCOL = args.sp

    # Initialize an avro serializer
    serializer = KafkaAvroGenericSerializer(p_schema)

    # Initialize a random record generator based on the given schema
    edgeGen = RecordGeneratorFactory.get_record_generator(serializer)

    # Set up the config for the Kafka producer
    config = {}
    config["bootstrap.servers"] = args.ks
    config["api.version.request"] = True
    config["client.id"] = args.pid

    if SECURITY_PROTOCOL.lower() == "ssl":
        config["security.protocol"] = SECURITY_PROTOCOL
        config["ssl.ca.location"] = CA_LOCATION
        config["ssl.certificate.location"] = CERT_LOCATION
        config["ssl.key.location"] = KEY_LOCATION
        config["ssl.key.password"] = KEY_PASS
    elif SECURITY_PROTOCOL.lower() == "plaintext":
        config["security.protocol"] = SECURITY_PROTOCOL
    else:
        msg = "Unsupported security protocol type for TC APIs: " + SECURITY_PROTOCOL
        raise ValueError(msg)

    producer = confluent_kafka.Producer(config)

    logger.info("Starting producer.")

    jsonout = open(args.f + ".json", 'w')
    binout = open(args.f + ".bin", 'wb')

    # Create a file writer and serialize all provided records to it.
    file_serializer = AvroGenericSerializer(p_schema, binout)

    for i in range(args.mr):
        edge = edgeGen.generate_random_record(args.n)

        # Provide a key for the record, this will determine which partition the record goes to
        kafka_key = str(i).encode()

        # Serialize the record
        message = serializer.serialize(topic, edge)

        if logger.isEnabledFor(logging.DEBUG):
            msg = "Attempting to send record k: {key}, value: {value}" \
                .format(key=kafka_key, value=edge)
            logger.debug(msg)

        producer.produce(topic, value=message, key=kafka_key)

        # serialize_to_json
        json = serializer.serialize_to_json(edge)
        jsonout.write(json + "\n")

        # write to binary file
        file_serializer.serialize_to_file(edge)

        if args.delay > 0:
            time.sleep(args.delay)

        producer.poll(0)

    producer.flush()
    jsonout.close()

    file_serializer.close_file_serializer()

    logger.info("Wrote " + str(args.mr) + " records to " + args.f)
コード例 #19
0
 def test_record_type(self):
     generator = record_generator.CDMEventGenerator(self.serializer)
     record = generator.generate_random_record(5)
     parser = parsing.CDMParser(self.reader_schema)
     self.assertTrue(Utils.validate(self.writer_schema, record))
     self.assertTrue(parser.get_union_branch_type(record) == "Event")
コード例 #20
0
def main():
    """ Run the cli tool. """
    # Create an argument parser and get the argument values.
    parser = get_arg_parser()
    args = parser.parse_args()
    producer = None
    consumer = None

    # Provided duration argument is in seconds -- convert it to milliseconds.
    duration_ms = args.d * 1000

    # Set log config, taking into account verbosity argument.
    fileConfig("logging.conf")
    if args.v:
        logging.getLogger("tc").setLevel(logging.DEBUG)
    if args.vv:
        logging.addLevelName(kafka.LOG_TRACE, "TRACE")
        logging.getLogger("tc").setLevel(kafka.LOG_TRACE)

    logger = logging.getLogger("tc")

    max_records = args.mr
    max_mb = args.mb

    p_schema = Utils.load_schema(args.psf)
    producer = Producer(args.ks,
                        args.pid,
                        args.topic,
                        args. async,
                        duration_ms,
                        args.delay,
                        None,
                        p_schema,
                        args.n,
                        args.sp,
                        args.ca,
                        args.cl,
                        args.kl,
                        args.kp,
                        skip_validate=(not args.ev))
    recordSize = -1

    if max_mb > 0 or max_records > 0:
        edgeGen = RecordGeneratorFactory.get_record_generator(
            producer.serializer)
        logger.info("Num kv pairs: " + str(args.n))
        recordSize = edgeGen.get_average_record_size(args.n)
        msg = "Serialized record size for {n} pairs: {size}".format(
            n=args.n, size=recordSize)
        logger.info(msg)

        if max_mb > 0:
            max_records_by_mb = (max_mb * 1024 * 1024) / recordSize
            msg = "Max Records by MB: {maxbymb}".format(
                maxbymb=max_records_by_mb)
            logging.info(msg)

            if max_records == -1:
                max_records = max_records_by_mb
            else:
                # We have both maxMB defined and maxRecords, pick the min
                if max_records > max_records_by_mb:
                    max_records = max_records_by_mb

        msg = "Max records: {maxr}".format(maxr=max_records)
        logger.info(msg)

    # Run a single producer if we weren't asked to disable it.
    if not args.np:
        producer.throughput_stats_init(max_mb, max_records, recordSize,
                                       args.sr)
        if args.noavro:
            producer.set_no_avro()

        if args.ev:
            logger.info("Explicit Validate on")

        producer.start()

    # Run a single consumer if we weren't asked to disable it.
    if not args.nc:
        c_schema = Utils.load_schema(args.csf)
        p_schema = Utils.load_schema(args.psf)
        consumer = Consumer(args.ks, args.g, args.topic, duration_ms,
                            args.call, c_schema, p_schema, args.co, args.sp,
                            args.ca, args.cl, args.kl, args.kp)
        consumer.throughput_stats_init(max_mb, max_records, recordSize)
        consumer.start()

    # Wait for the producer and consumer to complete, but periodically check
    # for an interrupt.
    if producer is not None:
        try:
            while producer.isAlive():
                producer.join(1)
        except KeyboardInterrupt:
            producer.shutdown()
    if consumer is not None:
        try:
            while consumer.isAlive():
                consumer.join(1)
        except KeyboardInterrupt:
            consumer.shutdown()
コード例 #21
0
def main():
    parser = get_arg_parser()
    args = parser.parse_args()

    fileConfig("logging.conf")
    if args.v:
        logging.getLogger("tc").setLevel(logging.DEBUG)

    logger = logging.getLogger("tc")

    max_records = args.mr

    # Load the avro schema
    p_schema = Utils.load_schema(args.psf)
    c_schema = Utils.load_schema(args.csf)

    # Kafka topic to publish to
    topic = args.topic

    # My producer ID
    group_id = args.g

    # Initialize an avro serializer
    deserializer = KafkaAvroGenericDeserializer(c_schema, p_schema)

    config = {}
    config["bootstrap.servers"] = args.ks
    config["group.id"] = group_id
    if args.sp.lower() == "ssl":
        config["security.protocol"] = args.sp
        config["ssl.ca.location"] = args.ca
        config["ssl.certificate.location"] = args.cl
        config["ssl.key.location"] = args.kl
        config["ssl.key.password"] = args.kp
    elif args.sp.lower() == "plaintext":
        config["security.protocol"] = args.sp
    else:
        msg = "Unsupported security protocol: " + args.sp
        logger.error(msg)
        sys.exit(1)

    default_topic_config = {}
    default_topic_config["auto.offset.reset"] = "earliest"
    config["default.topic.config"] = default_topic_config

    consumer = confluent_kafka.Consumer(config)
    consumer.subscribe([topic])

    logger.info("Starting Consumer.")

    jsonout = open(args.f + ".json", 'w')
    count = 0

    while count < args.mr:
        kafka_message = consumer.poll(1)

        if kafka_message and not kafka_message.error():
            message = deserializer.deserialize(topic, kafka_message.value())
            count += 1

            if logger.isEnabledFor(logging.DEBUG):
                msg = "Consumed record k: {key}, value: {value}" \
                        .format(key=kafka_message.key(), value=message)
                logger.debug(msg)

            jsonout.write(str(message) + "\n")

            if args.delay > 0:
                time.sleep(args.delay)

        elif not kafka_message or kafka_message.error().code(
        ) == confluent_kafka.KafkaError.REQUEST_TIMED_OUT:
            logger.debug("Comsumer timeout reached.")

        elif kafka_message.error().code(
        ) == confluent_kafka.KafkaError._PARTITION_EOF:
            logger.debug("End of partition reached.")

        elif kafka_message.error().code(
        ) == confluent_kafka.KafkaError.OFFSET_OUT_OF_RANGE:
            logger.debug("Offset out of range.")

    consumer.close()
    jsonout.close()

    logger.info("Wrote " + str(count) + " records to " + args.f)
コード例 #22
0
 def test_bad_node_label(self):
     schema = self.writer_schema
     bad_label = "badNodeLabel"
     bad_node = Utils.create_node(1, bad_label, schema)
     is_valid = Utils.validate(schema, bad_node)
     self.assertFalse(is_valid)
コード例 #23
0
def main():
    numnodes = 0
    numedges = 0
    numtags = 0
    minTS = sys.maxint
    maxTS = 0
    parser = get_arg_parser()
    args = parser.parse_args()
    fileConfig("logging.conf")
    if args.v:
        logging.getLogger("tc").setLevel(logging.DEBUG)

    logger = logging.getLogger("tc")

    # Load the avro schema
    p_schema = Utils.load_schema(args.psf)

    # The cdm parser
    cdmparser = CDMParser(p_schema)

    # Initialize an avro serializer
    rfile = open(args.f, 'rb')
    deserializer = KafkaAvroGenericDeserializer(p_schema, input_file=rfile)

    fname = os.path.basename(rfile.name)

    if args.s:
        # Create the graph
        dot = Digraph(name=fname, comment="CDM DOT digraph", engine=args.e)
        # default attributes we add
        dot.graph_attr['rankdir'] = 'RL'
        dot.node_attr['fontname'] = "Helvetica"
        dot.node_attr['fontsize'] = "7"
        dot.node_attr['margin'] = "0.0,0.0"
        dot.edge_attr['fontname'] = "Helvetica"
        dot.edge_attr['fontsize'] = "6"
        dot.node_attr['style'] = "filled"
        if args.c:
            dot.graph_attr['overlap'] = "False"
            dot.graph_attr['splines'] = "True"
            #dot.node_attr['style'] = ""
        # attributes specified by the user
        setProperties(args.G, dot.graph_attr)
        setProperties(args.N, dot.node_attr)
        setProperties(args.E, dot.edge_attr)
        # some debugging
        logger.debug(dot.graph_attr)
        logger.debug(dot.node_attr)
        logger.debug(dot.edge_attr)

    records = deserializer.deserialize_from_file()

    i = 0
    for edge in records:
        i = i + 1
        rtype = cdmparser.get_record_type(edge)

        logger.debug("parsed record of type " + rtype)
        logger.debug(edge)
        datum = edge["datum"]

        if rtype == 'SimpleEdge':
            fuuid = repr(datum["fromUuid"])
            tuuid = repr(datum["toUuid"])
            if args.s:
                dot.edge(fuuid,
                         tuuid,
                         constraint='false',
                         style=(datum['type'] in NodeEdgeShapesColors
                                and NodeEdgeShapesColors[datum['type']][0]
                                or NodeEdgeShapesColors[rtype][0]),
                         color=(datum['type'] in NodeEdgeShapesColors
                                and NodeEdgeShapesColors[datum['type']][1]
                                or NodeEdgeShapesColors[rtype][1]))
            numedges += 1

        elif rtype == 'ProvenanceTagNode' or rtype == 'TagEntity':
            numtags += 1
            continue

        else:  # a node
            uuid = repr(datum["uuid"])
            descr = ('\n' + getRecordAttributeDesciption(rtype, datum)
                     ) if not args.c else ''
            shape = NodeEdgeShapesColors[rtype][0]
            color = NodeEdgeShapesColors[rtype][1]
            if args.s:
                dot.node(uuid,
                         label=rtype + ":" + uuid[len(uuid) - 6:] + descr,
                         shape=shape,
                         color=color)
            if rtype == 'Event' and 'timestampMicros' in datum:
                ts = datum['timestampMicros']
                if not isValidTimestamp(ts):
                    print('Warning: invalid timestamp ' + str(ts))
                else:
                    if ts > 0 and ts < minTS: minTS = ts
                    elif ts > maxTS: maxTS = ts
            numnodes += 1

    rfile.close()
    logger.info(minTS)
    logger.info(maxTS)
    tSpanMicros = 1.0 * (maxTS - minTS)
    tSpanSec = tSpanMicros / 1e6
    logger.info("Read " + str(i) + " records {" + str(numnodes) + " nodes, " +
                str(numedges) + " edges, " + str(numtags) + " tags}")
    if tSpanSec > 0:
        logger.info("Event duration: " + str(tSpanMicros) + " micros, " +
                    str(tSpanSec) + " sec " +
                    (str(tSpanSec / 3600.0) +
                     " hrs" if tSpanSec > 3600 else ""))
    else:
        logger.info(
            "Event timestamps are not available, can't determine run duration")
    # render the graph
    if args.s: dot.render(args.f + '.' + args.e + '.gv', view=True)
コード例 #24
0
 def setUp(self):
     schema_file = os.path.dirname(os.path.realpath(__file__)) + \
             "/LabeledEdge.avsc"
     self.schema = Utils.load_schema(schema_file)
     self.serializer = AvroGenericSerializer(self.schema)
     self.deserializer = AvroGenericDeserializer(self.schema, self.schema)
コード例 #25
0
 def test_pretty_node_short(self):
     schema = self.writer_schema
     artifact_node = Utils.create_node(1, "Artifact", schema)
     artifact_node["properties"] = {}
     artifact_node["properties"]["path"] = "/dev/null"
     self.assertTrue("..." not in Utils.pretty_node(artifact_node))
コード例 #26
0
def main():
    parser = get_arg_parser()
    args = parser.parse_args()

    fileConfig("logging.conf")
    if args.v:
        logging.getLogger("tc").setLevel(logging.DEBUG)

    logger = logging.getLogger("tc")

    # Load the avro schema
    p_schema = Utils.load_schema(args.psf)

    # Kafka topic to publish to
    topic = args.topic

    # My producer ID
    producer_id = args.pid

    # Initialize an avro serializer

    rfile = open(args.f, 'rb')
    serializer = KafkaAvroGenericSerializer(p_schema,
                                            skip_validate=not args.ev)
    deserializer = KafkaAvroGenericDeserializer(p_schema, input_file=rfile)

    # Set up the config for the Kafka producer
    config = {}
    config["bootstrap.servers"] = args.ks
    config["api.version.request"] = True
    config["client.id"] = args.pid

    if args.sp.lower() == "ssl":
        config["security.protocol"] = args.sp
        config["ssl.ca.location"] = args.ca
        config["ssl.certificate.location"] = args.cl
        config["ssl.key.location"] = args.kl
        config["ssl.key.password"] = args.kp
    elif args.sp.lower() == "plaintext":
        config["security.protocol"] = args.sp
    else:
        msg = "Unsupported security protocol: " + args.sp
        logger.error(msg)
        sys.exit(1)

    producer = confluent_kafka.Producer(config)

    logger.info("Starting producer.")

    records = deserializer.deserialize_from_file()

    i = 0
    for edge in records:
        # Provide a key for the record, this will determine which partition the record goes to
        kafka_key = str(i).encode()
        i = i + 1

        # Serialize the record
        message = serializer.serialize(topic, edge)

        if logger.isEnabledFor(logging.DEBUG):
            msg = "Attempting to send record k: {key}, value: {value}" \
                .format(key=kafka_key, value=edge)
            logger.debug(msg)

        producer.produce(topic, value=message, key=kafka_key)
        producer.poll(0)

    producer.flush()
    rfile.close()
    logger.info("Wrote " + str(i) + " records to " + str(topic))