def kibana_handle_schema_change( tenant: str, alias_name: str, schema_old: Mapping[Any, Any], schema_new: Mapping[Any, Any], subscription: Mapping[str, Any], # Subscription.definition es_index: Mapping[Any, Any], es_conn, kibana_conn): node_new = Node(schema_new) kibana_index = make_kibana_index(alias_name, node_new) schema_name = schema_new.get('name') if schema_old is not None: if schema_old.get('name'): schema_name = schema_old.get('name') node_old = Node(schema_old) if Node.compare(node_old, node_new) == {}: return False # schema not substantially different if not check_for_kibana_update(schema_name, tenant, alias_name, subscription, kibana_index, es_index, es_conn, kibana_conn): return False return update_kibana_index(tenant, alias_name, schema_new, subscription, kibana_index, es_index, es_conn, kibana_conn)
def get_es_types_from_schema(schema: Node): # since we handle union types, we sort these in increasing importance # to ES's handling of them. I.E. if it can be an object or a string, # it's more tolerant to treat it as an object, etc. mappings = {} # basic avro types for avro_type, es_type in config.AVRO_TYPES: matches = [ i for i in schema.find_children( {'attr_contains': [{ 'avro_type': avro_type }]}) ] __handle_mapping_addition(matches, mappings, avro_type, es_type) # logical avro types for avro_type, es_type in config.AVRO_LOGICAL_TYPES: matches = [ i for i in schema.find_children( {'attr_contains': [{ 'logical_type': avro_type }]}) ] __handle_mapping_addition(matches, mappings, avro_type, es_type) # aether types for aether_type, es_type in config.AETHER_TYPES: matches = [ i for i in schema.find_children( {'match_attr': [{ '__extended_type': aether_type }]}) ] __handle_mapping_addition(matches, mappings, aether_type, es_type) return mappings
def auto_visualizations( alias_name: str, alias_index: str, node: Node, subscription: Mapping[str, Any], # Subscription.definition path_filters: List[Callable[[str], bool]] = __default_path_filters() ) -> Dict[str, Any]: LOG.debug(f'Getting visualizations for {alias_name}') visualizations = {} for _type in _supported_types(): handlers = _vis_for_type(_type) for vis_type, fn in handlers: if _type in AETHER_TYPES: paths = [ i for i in node.find_children( {'match_attr': [{ '__extended_type': _type }]}) ] elif _type in AVRO_TYPES: paths = [ i for i in node.find_children( {'attr_contains': [{ 'avro_type': _type }]}) ] title_template = '{alias} {form_name} ({field_name} -> {vis_type})' id_template = '{alias}_{form_name}_{field_name}_{vis_type}' for path in paths: if path_filters and not all([fn(path) for fn in path_filters]): LOG.debug(f'{path} ignored for visualization (filtered).') continue LOG.debug(f'visualizing path -> {path}') form_name = index_handler.get_formname(path) field_name = index_handler.remove_formname(path) title = title_template.format(alias=alias_name.capitalize(), form_name=form_name, field_name=field_name, vis_type=vis_type.capitalize()) _id = id_template.format(alias=alias_name, form_name=form_name.lower(), field_name=field_name.lower(), vis_type=vis_type.lower()) res = fn(title=title, alias=alias_index, field_name=field_name, node=node.get_node(path), subscription=subscription) visualizations[_id] = res return visualizations
def find_path_in_schema(self, schema: Node, test): _base_name = f'{schema.name}.' matches = [ i[len(_base_name):] if i.startswith(_base_name) else i for i in schema.find_children({'match_attr': [{ 'name': test }]}) ] return matches if matches else []
def test__comparison_nested_attr(ComplexSchema): a = deepcopy(ComplexSchema) b = deepcopy(ComplexSchema) path = 'operator_type' # change a node's attribute a.children[path].__lookup = [{"something": "else"}] # I don't always trust deepcopy... assert (a.children[path].__lookup != b.children[path].__lookup) res = Node.compare(b, a) assert (any([path in k for k in res.keys()]))
def __init__(self, schema: Mapping[Any, Any] = None, node: Node = None, raw_schema: str = None): if not any([schema, node, raw_schema]): raise ValueError( 'Must include one of: schema (dict) node (Node) or raw_schema (JSON)' ) if node: schema = node else: if schema: schema = Node(schema) else: schema = Node(json.loads(raw_schema)) self._base_name = schema.name self.load_defaults() self.schema = schema self.spavro_schema = parse_schema(self.schema._source)
def schema_defined_visualizations( alias_name: str, alias_index: str, node: Node, subscription: Mapping[str, Any], # Subscription.definition ) -> Dict[str, Any]: visualizations = {} paths = [ i for i in node.find_children({'has_attr': ['__default_visualization']}) ] LOG.debug(f'schemas found at paths {paths}') title_template = '{alias} {form_name} ({field_name} -> {vis_type})' id_template = '{alias}_{form_name}_{field_name}_{vis_type}' for path in paths: target_node = node.get_node(path) vis_name = target_node.__default_visualization if vis_name not in SCHEMA_VIS_MAP: LOG.debug( f'@path: {path} has preferred type {vis_name}. No handler found' ) continue vis_type, fn = SCHEMA_VIS_MAP.get(vis_name) LOG.debug(f'visualizing path -> {path}') form_name = index_handler.get_formname(path) field_name = index_handler.remove_formname(path) title = title_template.format(alias=alias_name.capitalize(), form_name=form_name, field_name=field_name, vis_type=vis_type.capitalize()) _id = id_template.format(alias=alias_name, form_name=form_name.lower(), field_name=field_name.lower(), vis_type=vis_type.lower()) res = fn(title=title, alias=alias_index, field_name=field_name, node=target_node, subscription=subscription) visualizations[_id] = res return visualizations
def test__process_geo_field(): to_test = [ [TYPE_INSTRUCTIONS, AUTOGEN_SCHEMA, SAMPLE_DOC, 'autogen'], [TYPE_INSTRUCTIONS, SIMPLE_SCHEMA, SAMPLE_DOC2, 'simple'] ] for instr, schema, doc, name in to_test: node = Node(schema) processor = ESItemProcessor(name, instr, node) # processor.schema_obj = schema # processor.load() res = processor._find_geopoints() assert(res.get('lat') is not None) doc = processor.process(doc) assert(doc.get('geo_point').get('lon') is not None)
def _format_lookups(schema: Node, default='Other', strip_form_name=True): matching = schema.collect_matching({'has_attr': ['__lookup']}) if not matching: return {} if not strip_form_name: return { key: _format_single_lookup(node, default) for key, node in matching } else: return { remove_formname(key): _format_single_lookup(node, default) for key, node in matching }
def _find_timestamp(schema: Node): # takes a field matching timestamp, or the first timestamp matching = schema.collect_matching( {'match_attr': [{ '__extended_type': 'dateTime' }]}) fields = sorted([remove_formname(key) for key, node in matching]) timestamps = [f for f in fields if 'timestamp' in f] preferred = consumer_config.get('es_options', {}).get('index_time', None) if fields and preferred in fields: return preferred elif timestamps: return timestamps[0] elif fields: return fields[0] else: return consumer_config.get('es_options', {}).get('auto_timestamp', None)
def ComplexSchema(): return Node(ANNOTATED_SCHEMA) # noqa
def AutoGenSchema(): return Node(AUTOGEN_SCHEMA) # noqa
def SimpleSchema(): return Node(SIMPLE_SCHEMA) # noqa
def _update_topic(self, topic, schema: Mapping[Any, Any]): self.log.debug(f'{self.tenant} is updating topic: {topic}') subscription = self._job_subscription_for_topic(topic) if not subscription: self.log.error(f'Could not find subscription for topic {topic}') return node: Node = Node(schema) self.log.debug('getting index') es_index = index_handler.get_es_index_from_subscription( subscription.definition.get('es_options'), name=self._name_from_topic(topic), tenant=self.tenant.lower(), schema=node ) self.log.debug(f'index {es_index}') alias_request = subscription.definition.get('es_options', {}).get('alias_name') if alias_request: alias = f'{alias_request}'.lower() else: alias = index_handler.get_alias_from_namespace(node.namespace) # Try to add the indices / ES alias es_instance = self._job_elasticsearch().get_session() if index_handler.es_index_changed(es_instance, es_index, self.tenant): self.log.debug(f'{self.tenant} updated schema for {topic}') self.log.debug(f'registering ES index:\n{json.dumps(es_index, indent=2)}') index_handler.update_es_index( es_instance, es_index, self.tenant, alias ) conn: KibanaInstance = self._job_kibana() old_schema = self._schemas.get(topic) updated_kibana = index_handler.kibana_handle_schema_change( self.tenant.lower(), alias, old_schema, schema, subscription.definition, es_index, es_instance, conn ) if updated_kibana: self.log.info( f'Registered kibana index {alias} for {self.tenant}' ) else: self.log.info( f'Kibana index {alias} did not need update.' ) self._indices[topic] = es_index self.log.debug(f'{self.tenant}:{topic} | idx: {es_index}') # update processor for type doc_type, instr = list(es_index['body']['mappings'].items())[0] self._doc_types[topic] = doc_type self._processors[topic] = ESItemProcessor(topic, instr, node) self._routes[topic] = self._processors[topic].create_route()
def test__comparison_unhandled(): a = datetime.now() b = datetime.now() assert (Node.compare_objects(a, b) is False)
def test__other_init_methods(): raw_schema_gen = SampleGenerator( raw_schema=json.dumps(EXAMPLE_SIMPLE_SCHEMA)) assert (raw_schema_gen.make_sample() is not None) node_gen = SampleGenerator(node=Node(EXAMPLE_SIMPLE_SCHEMA)) assert (node_gen.make_sample() is not None)
def merge_kibana_artifacts( tenant: str, alias_name: str, schema: Mapping[Any, Any], subscription: Mapping[str, Any], # Subscription.definition kibana_index: Mapping[Any, Any], # individual kibana index contribution kibana_conn, old_artifact: Mapping[Any, Any] = None # artifact describes multiple types ): schema_name = schema.get('name') index_hash = utils.hash(kibana_index) # TODO alias_index = f'{tenant}.{alias_name}' auto_vis_flag = subscription.get('kibana_options', {}).get('auto_visualization') if auto_vis_flag == 'full': LOG.info('Creating automatic visualizations') visualizations = auto_visualizations(alias_name, alias_index, Node(schema), subscription) elif auto_vis_flag == 'schema': LOG.info('Only creating vis from @aether_default_visualization') visualizations = schema_defined_visualizations(alias_name, alias_index, Node(schema), subscription) else: LOG.info('Not creating visualizations') visualizations = {} vis_hashes = {k: utils.hash(v) for k, v in visualizations.items()} if not old_artifact: # use the new one since there is no old one artifact = make_kibana_artifact(index={schema_name: index_hash}, visualization=vis_hashes) return kibana_index, artifact, visualizations old_index_hash = old_artifact.get('hashes', {}).get('index', {}).get(schema_name) old_vis_hashes = old_artifact.get('hashes', {}).get('visualization', {}) updated_visuals = { key: visualizations[key] for key, _hash in vis_hashes.items() if _hash not in old_vis_hashes.values() } if updated_visuals: LOG.debug(f'updated visuals: {list(updated_visuals.keys())}') # no change, ignore if (old_index_hash == index_hash) and (len(updated_visuals) == 0): return None, None, None # we need to reconcile the update try: old_kibana_index = handle_kibana_artifact(alias_name, tenant, kibana_conn, mode='READ', _type='index-pattern') except (HTTPError, ConsumerHttpException) as her: LOG.info(f'Old Kibana index not found {her}') old_kibana_index = {} new_kibana_index = utils.merge_dicts(old_kibana_index, kibana_index) artifact = make_kibana_artifact(index={schema_name: index_hash}, visualization=vis_hashes, old_artifact=old_artifact) return new_kibana_index, artifact, updated_visuals
def test__comparison_none(SimpleSchema): assert (Node.compare(SimpleSchema, SimpleSchema) == {})
def PolySchemaA(): return Node(POLY_SCHEMA_A) # noqa
def SimpleSchema(): return Node(EXAMPLE_SIMPLE_SCHEMA)
def PolySchemaB(): return Node(POLY_SCHEMA_B) # noqa
def ComplexSchema(): return Node(EXAMPLE_ANNOTATED_SCHEMA)
def AutoSchema(): return Node(EXAMPLE_AUTOGEN_SCHEMA)
def test__comparison_all(SimpleSchema): a = deepcopy(SimpleSchema) b = deepcopy(SimpleSchema) a.name = 'SomethingElse' # change root node (changes all paths) assert (len(Node.compare(a, b)) == 15) # all nodes