def setUp(self): schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/TCCDMDatum.avsc" self.reader_schema = Utils.load_schema(schema_file) self.writer_schema = Utils.load_schema(schema_file) self.serializer = AvroGenericSerializer(self.writer_schema) self.deserializer = AvroGenericDeserializer(self.reader_schema)
def __init__(self, conf, reset=False): """Set @reset to True to begin consuming at start of stream.""" config = dict() self.topic = conf['kafka']['topic'] config['bootstrap.servers'] = conf['kafka']['address'] default_topic_config = {} default_topic_config["auto.offset.reset"] = "smallest" default_topic_config['enable.auto.commit'] = True config["default.topic.config"] = default_topic_config # Set the group ID. state = self._get_state_info(conf) if not reset and state: group_id = state['group_id'] else: group_id = "CG_" + str(uuid.uuid4()) self._update_state(conf,'group_id', group_id) config["group.id"] = group_id # Add SSL stuff if conf['kafka'].getboolean('ssl_enable'): config["security.protocol"] = 'ssl' config["ssl.ca.location"] = conf['kafka']['ca_path'] config["ssl.certificate.location"] = conf['kafka']['cert_path'] config["ssl.key.location"] = conf['kafka']['key_path'] config["ssl.key.password"] = conf['kafka']['password'] self.consumer = confluent_kafka.Consumer(config) self.consumer.subscribe([self.topic]) p_schema = Utils.load_schema(conf['kafka']['schema']) c_schema = Utils.load_schema(conf['kafka']['schema']) self.deserializer = KafkaAvroGenericDeserializer(c_schema, p_schema)
def test_pretty_node_long(self): schema = self.writer_schema artifact_node = Utils.create_node(1, "Artifact", schema) artifact_node["properties"] = {} artifact_node["properties"]["path"] = \ "/tmp/this/is/a/long/path/and/it/should/get/broken/up.txt" self.assertTrue("..." in Utils.pretty_node(artifact_node))
def setUp(self): union_schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledGraph.avsc" self.reader_schema = Utils.load_schema(union_schema_file) self.writer_schema = Utils.load_schema(union_schema_file) self.serializer = AvroGenericSerializer(self.writer_schema) self.deserializer = AvroGenericDeserializer(self.reader_schema, self.writer_schema)
def setUp(self): union_schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledGraph.avsc" self.union_schema = Utils.load_schema(union_schema_file) nested_schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledEdge.avsc" self.nested_schema = Utils.load_schema(nested_schema_file)
def test_bad_edge_label(self): schema = self.writer_schema node1 = Utils.create_node(1, "unitOfExecution", schema) node2 = Utils.create_node(2, "agent", schema) bad_label = "badEdgeLabel" bad_edge = Utils.create_edge(node1, node2, bad_label, schema) is_valid = Utils.validate(schema, bad_edge) self.assertFalse(is_valid)
def test_optional_field_absent(self): schema = self.writer_schema node1 = Utils.create_node(1, "unitOfExecution", schema, True) node2 = Utils.create_node(2, "agent", schema, True) edge1 = Utils.create_edge(node1, node2, "wasAssociatedWith", schema) serialized_edge = self.serializer.serialize_to_bytes(edge1) deserialized_edge = \ self.deserializer.deserialize_bytes(serialized_edge) self.assertTrue(deserialized_edge["properties"] is None)
def serialization_test_helper(self, schema, is_union): node_file_path = os.path.dirname(os.path.realpath(__file__)) + \ "/testNodes.avro" edge_file_path = os.path.dirname(os.path.realpath(__file__)) + \ "/testEdges.avro" # Create some nodes and an edge. node1 = Utils.create_node(1, "unitOfExecution", schema, True) node2 = Utils.create_node(2, "artifact", schema, True) edge1 = Utils.create_edge(node1, node2, "read", schema) if is_union: # Serialize the nodes and edge to files. with open(node_file_path, "wb") as node_file: self.serializer = AvroGenericSerializer( self.writer_schema, node_file) self.serializer.serialize_to_file([node1, node2]) self.serializer.close_file_serializer() with open(edge_file_path, "wb") as edge_file: self.serializer = AvroGenericSerializer(self.writer_schema, edge_file) self.serializer.serialize_to_file([edge1]) self.serializer.close_file_serializer() if is_union: # Deserialize from the files to records. with open(node_file_path, "rb") as node_file: self.deserializer = AvroGenericDeserializer( self.reader_schema, self.writer_schema, node_file) deserialized_nodes = \ self.deserializer.deserialize_from_file() self.deserializer.close_file_deserializer() with open(edge_file_path, "rb") as edge_file: self.deserializer = AvroGenericDeserializer( self.reader_schema, self.writer_schema, edge_file) deserialized_edges = \ self.deserializer.deserialize_from_file() self.deserializer.close_file_deserializer() if is_union: # Check the deserialized nodes. self.assertTrue(len(deserialized_nodes) == 2) self.compare_nodes(node1, deserialized_nodes[0]) self.compare_nodes(node2, deserialized_nodes[1]) # Check the deserialized edges. self.assertTrue(len(deserialized_edges) == 1) self.compare_edges(edge1, deserialized_edges[0]) if is_union: # Clean up the files os.remove(node_file_path) os.remove(edge_file_path)
def test_serialization_nested(self): schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledEdge.avsc" self.writer_schema = Utils.load_schema(schema_file) schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledEdgev2.avsc" self.reader_schema = Utils.load_schema(schema_file) self.serializer = AvroGenericSerializer(self.writer_schema) self.deserializer = AvroGenericDeserializer(self.reader_schema, self.writer_schema) self.serialization_test_helper(self.writer_schema, False)
def setUp(self): schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledGraph.avsc" schema = Utils.load_schema(schema_file) self.reader_schema = schema self.writer_schema = schema self.node_schema = Utils.get_schema_by_name( self.writer_schema, TestUnionSchema._NODE_SCHEMA_FULLNAME) self.edge_schema = Utils.get_schema_by_name( self.writer_schema, TestUnionSchema._EDGE_SCHEMA_FULLNAME) self.serializer = AvroGenericSerializer(self.writer_schema) self.deserializer = AvroGenericDeserializer(self.reader_schema, self.writer_schema)
def test_serialization_union(self): schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledGraphv2.avsc" self.reader_schema = Utils.load_schema(schema_file) self.deserializer = AvroGenericDeserializer(self.reader_schema, self.writer_schema) self.serialization_test_helper(self.writer_schema, True)
def __init__( self, kafka_server, group_id, topic, duration, consume_all, consumer_schema_filename, producer_schema_filename, auto_offset, security_protocol=None, ca_cert=None, cert_location=None, key_location=None, key_pass=None, session_timeout=_DEFAULT_SESSION_TIMEOUT_MS): """Create a simple consumer. :param kafka_server: Connection string for bootstrap Kafka server. :param group_id: Group ID to use for distributed consumers. :param topic: Topic to consume from. :param duration: Duration to run for. :param consumer_schema_filename: Filename for consumer schema. :param producer_schema_filename: Filename for producer schema. :param auto_offset: Offset reset method to use for consumers. """ super(Consumer, self).__init__() self.kafka_server = kafka_server self.group_id = group_id self.topic = topic self.duration = duration self.consume_all = consume_all self.consumer_schema_filename = consumer_schema_filename self.producer_schema_filename = producer_schema_filename self.serializer = KafkaAvroGenericSerializer(self.consumer_schema_filename) self.deserializer = KafkaAvroGenericDeserializer( self.consumer_schema_filename, self.producer_schema_filename) self.auto_offset = auto_offset self.consume_timeout = Consumer._DEFAULT_CONSUME_TIMEOUT # Handle a sigint shutdown cleanly. self._shutdown = False config = {} config["bootstrap.servers"] = self.kafka_server config["group.id"] = self.group_id config["session.timeout.ms"] = session_timeout if security_protocol: if security_protocol.lower() == "ssl": config["security.protocol"] = security_protocol config["ssl.ca.location"] = ca_cert config["ssl.certificate.location"] = cert_location config["ssl.key.location"] = key_location config["ssl.key.password"] = key_pass elif security_protocol.lower() == "plaintext": config["security.protocol"] = security_protocol else: msg = "Unsupported security protocol type for TC APIs: " + security_protocol raise ValueError(msg) default_topic_config = {} default_topic_config["auto.offset.reset"] = self.auto_offset config["default.topic.config"] = default_topic_config self.consumer = confluent_kafka.Consumer(config) self.consumer.subscribe([self.topic]) self.latency_stats = Utils.Stats( 1, "End-to-End Latency (including Avro serialization)", "ms")
def test_serialization(self): node1 = Utils.create_node(1, "unitOfExecution", True, self.schema) node2 = Utils.create_node(2, "artifact", True, self.schema) edge = Utils.create_edge(node1, node2, "read", True, self.schema) # Make sure serialization and deserialization is symmetric. json_edge = self.serializer.serialize_to_json(edge, True) deserialized_edge = self.deserializer.deserialize_json(json_edge) self.assertTrue(edge == deserialized_edge) # Make sure that the serializer can serialize to both bytes and json # without corrupting any internal state. Also, test without making # the json serialization prettified. edge = Utils.create_edge(node1, node2, "modified", True, self.schema) self.serializer.serialize_to_bytes(edge) json_edge = self.serializer.serialize_to_json(edge) deserialized_edge = self.deserializer.deserialize_json(json_edge) self.assertTrue(edge == deserialized_edge)
def _run_record_type_test(self, generator, expected_value): parser = parsing.CDMParser(self.reader_schema) for i in range(20): record = generator.generate_random_record( TestCDMTypeParsing._KV_PAIRS) self.assertTrue(Utils.validate(self.writer_schema, record)) self.assertTrue(parser.get_record_type(record) == expected_value) serialized = self.serializer.serialize_to_bytes(record) deserialized = self.deserializer.deserialize_bytes(serialized) self.assertTrue( parser.get_record_type(deserialized) == expected_value)
def serialization_test_helper(self, schema, is_union): node1 = Utils.create_node(1, "unitOfExecution", schema, True) node2 = Utils.create_node(2, "artifact", schema, True) edge1 = Utils.create_edge(node1, node2, "read", schema) if is_union: serialized_node1 = self.serializer.serialize_to_bytes(node1) deserialized_node1 = \ self.deserializer.deserialize_bytes(serialized_node1) self.compare_nodes(node1, deserialized_node1) serialized_node2 = self.serializer.serialize_to_bytes(node2) deserialized_node2 = \ self.deserializer.deserialize_bytes(serialized_node2) self.compare_nodes(node2, deserialized_node2) serialized_edge1 = self.serializer.serialize_to_bytes(edge1) deserialized_edge1 = \ self.deserializer.deserialize_bytes(serialized_edge1) self.compare_edges(edge1, deserialized_edge1)
def test_union_schema(self): schema = self.writer_schema node1 = Utils.create_node(1, "unitOfExecution", schema, True) node2 = Utils.create_node(2, "agent", schema, True) edge1 = Utils.create_edge(node1, node2, "wasAssociatedWith", schema, True) serialized_node = self.serializer.serialize_to_bytes(node1) serialized_edge = self.serializer.serialize_to_bytes(edge1) deserialized_node = self.deserializer.deserialize_bytes( serialized_node) deserialized_edge = self.deserializer.deserialize_bytes( serialized_edge) # Don't convert these to strings, like in the Java code. That results # in differences due to unicode strings for keys in the # Avro-deserialized in the Python 2.7 version, and we don't really # want to deal with that. self.assertTrue(node1 == deserialized_node) self.assertTrue(edge1 == deserialized_edge)
sys.exit(-1) if args['kafka_topic'] is None: print 'Argument --kafka-topic is required' sys.exit(-1) if args['kafka_group'] is None: print 'Argument --kafka-group is required' sys.exit(-1) kafka_client = KafkaClient(input_url) kafka_topic = kafka_client.topics[args['kafka_topic']] consumer = kafka_topic.get_balanced_consumer( consumer_group=args['kafka_group'], auto_commit_enable=True, auto_commit_interval_ms=1000, reset_offset_on_start=False, consumer_timeout_ms=100, fetch_wait_max_ms=0, managed=True) schema = Utils.load_schema(SCHEMA_FILE) deserializer = KafkaAvroGenericDeserializer(schema, schema) parser = CDMParser(schema) f = consumer elif input_source == 'file': if input_format == 'avro': ifile = open(input_url, 'rb') schema = Utils.load_schema(SCHEMA_FILE) deserializer = KafkaAvroGenericDeserializer(schema, input_file=ifile) parser = CDMParser(schema) f = deserializer.deserialize_from_file() elif input_format == 'json': f = open(input_url, 'r') # process records
def main(): parser = get_arg_parser() args = parser.parse_args() fileConfig("logging.conf") if args.v: logging.getLogger("tc").setLevel(logging.DEBUG) logger = logging.getLogger("tc") max_records = args.mr # Load the avro schema p_schema = Utils.load_schema(args.psf) # Kafka topic to publish to topic = args.topic # My producer ID producer_id = args.pid # Security protocol if args.sp is not None: SECURITY_PROTOCOL = args.sp # Initialize an avro serializer serializer = KafkaAvroGenericSerializer(p_schema) # Initialize a random record generator based on the given schema edgeGen = RecordGeneratorFactory.get_record_generator(serializer) # Set up the config for the Kafka producer config = {} config["bootstrap.servers"] = args.ks config["api.version.request"] = True config["client.id"] = args.pid if SECURITY_PROTOCOL.lower() == "ssl": config["security.protocol"] = SECURITY_PROTOCOL config["ssl.ca.location"] = CA_LOCATION config["ssl.certificate.location"] = CERT_LOCATION config["ssl.key.location"] = KEY_LOCATION config["ssl.key.password"] = KEY_PASS elif SECURITY_PROTOCOL.lower() == "plaintext": config["security.protocol"] = SECURITY_PROTOCOL else: msg = "Unsupported security protocol type for TC APIs: " + SECURITY_PROTOCOL raise ValueError(msg) producer = confluent_kafka.Producer(config) logger.info("Starting producer.") jsonout = open(args.f + ".json", 'w') binout = open(args.f + ".bin", 'wb') # Create a file writer and serialize all provided records to it. file_serializer = AvroGenericSerializer(p_schema, binout) for i in range(args.mr): edge = edgeGen.generate_random_record(args.n) # Provide a key for the record, this will determine which partition the record goes to kafka_key = str(i).encode() # Serialize the record message = serializer.serialize(topic, edge) if logger.isEnabledFor(logging.DEBUG): msg = "Attempting to send record k: {key}, value: {value}" \ .format(key=kafka_key, value=edge) logger.debug(msg) producer.produce(topic, value=message, key=kafka_key) # serialize_to_json json = serializer.serialize_to_json(edge) jsonout.write(json + "\n") # write to binary file file_serializer.serialize_to_file(edge) if args.delay > 0: time.sleep(args.delay) producer.poll(0) producer.flush() jsonout.close() file_serializer.close_file_serializer() logger.info("Wrote " + str(args.mr) + " records to " + args.f)
def test_record_type(self): generator = record_generator.CDMEventGenerator(self.serializer) record = generator.generate_random_record(5) parser = parsing.CDMParser(self.reader_schema) self.assertTrue(Utils.validate(self.writer_schema, record)) self.assertTrue(parser.get_union_branch_type(record) == "Event")
def main(): """ Run the cli tool. """ # Create an argument parser and get the argument values. parser = get_arg_parser() args = parser.parse_args() producer = None consumer = None # Provided duration argument is in seconds -- convert it to milliseconds. duration_ms = args.d * 1000 # Set log config, taking into account verbosity argument. fileConfig("logging.conf") if args.v: logging.getLogger("tc").setLevel(logging.DEBUG) if args.vv: logging.addLevelName(kafka.LOG_TRACE, "TRACE") logging.getLogger("tc").setLevel(kafka.LOG_TRACE) logger = logging.getLogger("tc") max_records = args.mr max_mb = args.mb p_schema = Utils.load_schema(args.psf) producer = Producer(args.ks, args.pid, args.topic, args. async, duration_ms, args.delay, None, p_schema, args.n, args.sp, args.ca, args.cl, args.kl, args.kp, skip_validate=(not args.ev)) recordSize = -1 if max_mb > 0 or max_records > 0: edgeGen = RecordGeneratorFactory.get_record_generator( producer.serializer) logger.info("Num kv pairs: " + str(args.n)) recordSize = edgeGen.get_average_record_size(args.n) msg = "Serialized record size for {n} pairs: {size}".format( n=args.n, size=recordSize) logger.info(msg) if max_mb > 0: max_records_by_mb = (max_mb * 1024 * 1024) / recordSize msg = "Max Records by MB: {maxbymb}".format( maxbymb=max_records_by_mb) logging.info(msg) if max_records == -1: max_records = max_records_by_mb else: # We have both maxMB defined and maxRecords, pick the min if max_records > max_records_by_mb: max_records = max_records_by_mb msg = "Max records: {maxr}".format(maxr=max_records) logger.info(msg) # Run a single producer if we weren't asked to disable it. if not args.np: producer.throughput_stats_init(max_mb, max_records, recordSize, args.sr) if args.noavro: producer.set_no_avro() if args.ev: logger.info("Explicit Validate on") producer.start() # Run a single consumer if we weren't asked to disable it. if not args.nc: c_schema = Utils.load_schema(args.csf) p_schema = Utils.load_schema(args.psf) consumer = Consumer(args.ks, args.g, args.topic, duration_ms, args.call, c_schema, p_schema, args.co, args.sp, args.ca, args.cl, args.kl, args.kp) consumer.throughput_stats_init(max_mb, max_records, recordSize) consumer.start() # Wait for the producer and consumer to complete, but periodically check # for an interrupt. if producer is not None: try: while producer.isAlive(): producer.join(1) except KeyboardInterrupt: producer.shutdown() if consumer is not None: try: while consumer.isAlive(): consumer.join(1) except KeyboardInterrupt: consumer.shutdown()
def main(): parser = get_arg_parser() args = parser.parse_args() fileConfig("logging.conf") if args.v: logging.getLogger("tc").setLevel(logging.DEBUG) logger = logging.getLogger("tc") max_records = args.mr # Load the avro schema p_schema = Utils.load_schema(args.psf) c_schema = Utils.load_schema(args.csf) # Kafka topic to publish to topic = args.topic # My producer ID group_id = args.g # Initialize an avro serializer deserializer = KafkaAvroGenericDeserializer(c_schema, p_schema) config = {} config["bootstrap.servers"] = args.ks config["group.id"] = group_id if args.sp.lower() == "ssl": config["security.protocol"] = args.sp config["ssl.ca.location"] = args.ca config["ssl.certificate.location"] = args.cl config["ssl.key.location"] = args.kl config["ssl.key.password"] = args.kp elif args.sp.lower() == "plaintext": config["security.protocol"] = args.sp else: msg = "Unsupported security protocol: " + args.sp logger.error(msg) sys.exit(1) default_topic_config = {} default_topic_config["auto.offset.reset"] = "earliest" config["default.topic.config"] = default_topic_config consumer = confluent_kafka.Consumer(config) consumer.subscribe([topic]) logger.info("Starting Consumer.") jsonout = open(args.f + ".json", 'w') count = 0 while count < args.mr: kafka_message = consumer.poll(1) if kafka_message and not kafka_message.error(): message = deserializer.deserialize(topic, kafka_message.value()) count += 1 if logger.isEnabledFor(logging.DEBUG): msg = "Consumed record k: {key}, value: {value}" \ .format(key=kafka_message.key(), value=message) logger.debug(msg) jsonout.write(str(message) + "\n") if args.delay > 0: time.sleep(args.delay) elif not kafka_message or kafka_message.error().code( ) == confluent_kafka.KafkaError.REQUEST_TIMED_OUT: logger.debug("Comsumer timeout reached.") elif kafka_message.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: logger.debug("End of partition reached.") elif kafka_message.error().code( ) == confluent_kafka.KafkaError.OFFSET_OUT_OF_RANGE: logger.debug("Offset out of range.") consumer.close() jsonout.close() logger.info("Wrote " + str(count) + " records to " + args.f)
def test_bad_node_label(self): schema = self.writer_schema bad_label = "badNodeLabel" bad_node = Utils.create_node(1, bad_label, schema) is_valid = Utils.validate(schema, bad_node) self.assertFalse(is_valid)
def main(): numnodes = 0 numedges = 0 numtags = 0 minTS = sys.maxint maxTS = 0 parser = get_arg_parser() args = parser.parse_args() fileConfig("logging.conf") if args.v: logging.getLogger("tc").setLevel(logging.DEBUG) logger = logging.getLogger("tc") # Load the avro schema p_schema = Utils.load_schema(args.psf) # The cdm parser cdmparser = CDMParser(p_schema) # Initialize an avro serializer rfile = open(args.f, 'rb') deserializer = KafkaAvroGenericDeserializer(p_schema, input_file=rfile) fname = os.path.basename(rfile.name) if args.s: # Create the graph dot = Digraph(name=fname, comment="CDM DOT digraph", engine=args.e) # default attributes we add dot.graph_attr['rankdir'] = 'RL' dot.node_attr['fontname'] = "Helvetica" dot.node_attr['fontsize'] = "7" dot.node_attr['margin'] = "0.0,0.0" dot.edge_attr['fontname'] = "Helvetica" dot.edge_attr['fontsize'] = "6" dot.node_attr['style'] = "filled" if args.c: dot.graph_attr['overlap'] = "False" dot.graph_attr['splines'] = "True" #dot.node_attr['style'] = "" # attributes specified by the user setProperties(args.G, dot.graph_attr) setProperties(args.N, dot.node_attr) setProperties(args.E, dot.edge_attr) # some debugging logger.debug(dot.graph_attr) logger.debug(dot.node_attr) logger.debug(dot.edge_attr) records = deserializer.deserialize_from_file() i = 0 for edge in records: i = i + 1 rtype = cdmparser.get_record_type(edge) logger.debug("parsed record of type " + rtype) logger.debug(edge) datum = edge["datum"] if rtype == 'SimpleEdge': fuuid = repr(datum["fromUuid"]) tuuid = repr(datum["toUuid"]) if args.s: dot.edge(fuuid, tuuid, constraint='false', style=(datum['type'] in NodeEdgeShapesColors and NodeEdgeShapesColors[datum['type']][0] or NodeEdgeShapesColors[rtype][0]), color=(datum['type'] in NodeEdgeShapesColors and NodeEdgeShapesColors[datum['type']][1] or NodeEdgeShapesColors[rtype][1])) numedges += 1 elif rtype == 'ProvenanceTagNode' or rtype == 'TagEntity': numtags += 1 continue else: # a node uuid = repr(datum["uuid"]) descr = ('\n' + getRecordAttributeDesciption(rtype, datum) ) if not args.c else '' shape = NodeEdgeShapesColors[rtype][0] color = NodeEdgeShapesColors[rtype][1] if args.s: dot.node(uuid, label=rtype + ":" + uuid[len(uuid) - 6:] + descr, shape=shape, color=color) if rtype == 'Event' and 'timestampMicros' in datum: ts = datum['timestampMicros'] if not isValidTimestamp(ts): print('Warning: invalid timestamp ' + str(ts)) else: if ts > 0 and ts < minTS: minTS = ts elif ts > maxTS: maxTS = ts numnodes += 1 rfile.close() logger.info(minTS) logger.info(maxTS) tSpanMicros = 1.0 * (maxTS - minTS) tSpanSec = tSpanMicros / 1e6 logger.info("Read " + str(i) + " records {" + str(numnodes) + " nodes, " + str(numedges) + " edges, " + str(numtags) + " tags}") if tSpanSec > 0: logger.info("Event duration: " + str(tSpanMicros) + " micros, " + str(tSpanSec) + " sec " + (str(tSpanSec / 3600.0) + " hrs" if tSpanSec > 3600 else "")) else: logger.info( "Event timestamps are not available, can't determine run duration") # render the graph if args.s: dot.render(args.f + '.' + args.e + '.gv', view=True)
def setUp(self): schema_file = os.path.dirname(os.path.realpath(__file__)) + \ "/LabeledEdge.avsc" self.schema = Utils.load_schema(schema_file) self.serializer = AvroGenericSerializer(self.schema) self.deserializer = AvroGenericDeserializer(self.schema, self.schema)
def test_pretty_node_short(self): schema = self.writer_schema artifact_node = Utils.create_node(1, "Artifact", schema) artifact_node["properties"] = {} artifact_node["properties"]["path"] = "/dev/null" self.assertTrue("..." not in Utils.pretty_node(artifact_node))
def main(): parser = get_arg_parser() args = parser.parse_args() fileConfig("logging.conf") if args.v: logging.getLogger("tc").setLevel(logging.DEBUG) logger = logging.getLogger("tc") # Load the avro schema p_schema = Utils.load_schema(args.psf) # Kafka topic to publish to topic = args.topic # My producer ID producer_id = args.pid # Initialize an avro serializer rfile = open(args.f, 'rb') serializer = KafkaAvroGenericSerializer(p_schema, skip_validate=not args.ev) deserializer = KafkaAvroGenericDeserializer(p_schema, input_file=rfile) # Set up the config for the Kafka producer config = {} config["bootstrap.servers"] = args.ks config["api.version.request"] = True config["client.id"] = args.pid if args.sp.lower() == "ssl": config["security.protocol"] = args.sp config["ssl.ca.location"] = args.ca config["ssl.certificate.location"] = args.cl config["ssl.key.location"] = args.kl config["ssl.key.password"] = args.kp elif args.sp.lower() == "plaintext": config["security.protocol"] = args.sp else: msg = "Unsupported security protocol: " + args.sp logger.error(msg) sys.exit(1) producer = confluent_kafka.Producer(config) logger.info("Starting producer.") records = deserializer.deserialize_from_file() i = 0 for edge in records: # Provide a key for the record, this will determine which partition the record goes to kafka_key = str(i).encode() i = i + 1 # Serialize the record message = serializer.serialize(topic, edge) if logger.isEnabledFor(logging.DEBUG): msg = "Attempting to send record k: {key}, value: {value}" \ .format(key=kafka_key, value=edge) logger.debug(msg) producer.produce(topic, value=message, key=kafka_key) producer.poll(0) producer.flush() rfile.close() logger.info("Wrote " + str(i) + " records to " + str(topic))