def create_stream_from_influxdb_discovery_fast(metric: str, column: Dict[str, Dict[str, Any]]) -> None: validated_columns = [] found_columns = [] errors = [] for key, value in column.items(): if key in found_columns: errors.append("The column '%s' is duplicated?" % key) continue if key == TIMESTAMP_COLUMN_NAME: errors.append("The column name '%s' is reserved!" % TIMESTAMP_COLUMN_NAME) continue found_columns.append(key) if value['isTag']: validated_columns.append({'name': key, 'type': TEXT_INPUT, 'nullable': False, 'isTag': True}) else: next_type = INFLUXDB_FAST_SWITCHER.get(value['type'], TEXT_INPUT) validated_columns.append({'name': key, 'type': next_type, 'nullable': True, 'isTag': False}) validated_columns.append({'name': TIMESTAMP_COLUMN_NAME, 'type': TIMESTAMP_WITH_TIMEZONE_TYPE_EXTERNAL, 'nullable': True, 'isTag': False}) if len(errors) > 0: raise GuardianException(where=INFLUXDB_LINE_INSERT_VIOLATION, message=errors) (schema, stream) = metric.split('.') get_stream_cache().create_stream(schema, stream, validated_columns)
def delete_stream(self, schema: str, table: str) -> None: try: self._cursor.execute("DROP TABLE %s.%s" % (my_monet_escape(schema), my_monet_escape(table))) self._connection.commit() except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__())
def create_json_stream(schema: Dict[Any, Any]) -> None: found_columns = [] errors = [] for column in schema['columns']: next_name = column['name'] if next_name in found_columns: errors.append("The column '%s' is duplicated?" % next_name) continue if next_name == TIMESTAMP_COLUMN_NAME: errors.append("The column name '%s' is reserved!" % TIMESTAMP_COLUMN_NAME) continue found_columns.append(next_name) column['isTag'] = False if 'tags' in schema: for tag in schema['tags']: if tag in found_columns: errors.append("The column '%s' is duplicated?" % tag) continue found_columns.append(tag) schema['columns'].append({'name': tag, 'type': TEXT_INPUT, 'nullable': False, 'isTag': True}) if len(errors) > 0: raise GuardianException(where=JSON_SCHEMA_CREATE_VIOLATION, message=errors) schema['columns'].append({'name': TIMESTAMP_COLUMN_NAME, 'type': TIMESTAMP_WITH_TIMEZONE_TYPE_EXTERNAL, 'nullable': True, 'isTag': False}) get_stream_cache().create_stream(schema['schema'], schema['stream'], schema['columns'])
async def add_influxdb_lines(lines: str) -> None: (streams, found_errors) = use_antlr_parser(lines) await asyncio.wrap_future(THREAD_POOL.submit(insert_json_values, streams)) if len(found_errors): raise GuardianException(where=INFLUXDB_LINE_INSERT_VIOLATION, message=found_errors)
def insert_points_via_insertinto(self, schema: str, stream: str, records: str) -> None: try: self._cursor.execute("INSERT INTO %s.%s VALUES %s" % (my_monet_escape(schema), my_monet_escape(stream), records)) self._connection.commit() except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__())
def insert_points_via_csv(self, metric_name: str, nrecords: int, records: str) -> None: insert_string = "COPY %d RECORDS INTO %s FROM STDIN;\n%%s" % (nrecords, my_monet_escape(metric_name)) try: self._cursor.execute(insert_string % records) self._connection.commit() except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__())
async def add_json_lines(lines: List[Dict[str, Any]]) -> None: try: INSERT_DATA_SCHEMA.validate(lines) except (ValidationError, BaseException) as ex: raise GuardianException(where=JSON_SCHEMA_LINE_SPLIT_VIOLATION, message=ex.__str__()) await asyncio.wrap_future(THREAD_POOL.submit(insert_json_values, lines))
async def json_delete_stream(submitted_json: Dict[str, str]) -> None: try: DELETE_STREAMS_SCHEMA.validate(submitted_json) except (ValidationError, BaseException) as ex: raise GuardianException(where=JSON_SCHEMA_DELETE_VIOLATION, message=ex.__str__()) await asyncio.wrap_future( THREAD_POOL.submit(delete_json_stream, submitted_json))
def get_single_database_stream(self, schema: str, stream: str) -> Dict[Any, Any]: """The sql injection should be prevented from the upper layer, but I am doing here as well""" try: # TODO type='4' sqlt = ''.join(["""SELECT tables."id", schemas."name", tables."name" FROM""", """(SELECT "id", "name", "schema_id" FROM sys.tables WHERE tables."name"='""", my_monet_escape(stream), """') AS tables INNER JOIN (SELECT "id", "name" FROM sys.schemas WHERE schemas."name"='""", my_monet_escape(schema), """') AS schemas ON (tables."schema_id"=schemas."id") ORDER BY tables."id" """]) self._cursor.execute(sqlt) table = self._cursor.fetchall() except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__()) if len(table) == 0: raise GuardianException(where=STREAM_NOT_FOUND, message='The stream %s.%s was not found in the server!' % (schema, stream)) try: sqlc = ''.join(["""SELECT columns."table_id", columns."name", columns."type", columns."null",""", """ columns."type_digits" FROM (SELECT "id", "table_id", "name", "type", "null", """, """ "number", "type_digits" FROM sys.columns) AS columns INNER JOIN (SELECT "id" FROM """, """ sys.tables WHERE tables."id"='""", my_monet_escape(str(table[0][0])), """') AS tables ON (tables."id"=columns."table_id") ORDER BY columns."table_id", """ """ columns."number" """]) self._cursor.execute(sqlc) columns = self._cursor.fetchall() self._connection.commit() except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__()) result = OrderedDict([('schema', table[0][1]), ('stream', table[0][2]), ('columns', [])]) for entry in columns: entry = list(entry) entry[2] = normalize_monetdb_type(entry[2]) array_to_append = [('name', entry[1]), ('type', entry[2]), ('nullable', entry[3])] if entry[2] in BOUNDED_TEXT_INPUTS: array_to_append.append(('limit', entry[4])) result['columns'].append(OrderedDict(array_to_append)) return result
async def discovery_influxdb_lines_slow(lines: str) -> None: (streams, found_errors) = use_antlr_parser(lines) for values in streams: create_stream_from_influxdb_discovery_slow(values['schema'], values['stream'], values['values'][0], values['tags']) await asyncio.wrap_future(THREAD_POOL.submit(insert_json_values, streams)) if len(found_errors): raise GuardianException(where=INFLUXDB_LINE_INSERT_VIOLATION, message=found_errors)
def use_antlr_parser(lines: str): try: lexer = influxdbLexer(antlr4.InputStream(lines)) stream = antlr4.CommonTokenStream(lexer) parser = influxdbParser(stream) tree = parser.lines() listener = GuardianInfluxDBListener() walker = antlr4.ParseTreeWalker() walker.walk(listener, tree) except BaseException as ex: raise GuardianException(where=INFLUXDB_LINE_INSERT_VIOLATION, message=ex.__str__()) return listener.get_parsed_values(), listener.get_found_errors()
async def mqttclient_coro(): client = MQTTClient() await client.connect('mqtt://localhost:1883/') await client.subscribe(QOS_VALUES) try: while True: message = await client.deliver_message() packet = message.publish_packet topic_name = packet.variable_header.topic_name decoded_packet_data = packet.payload.data.decode('utf-8') try: if topic_name == INFLUXDB_TOPIC: await add_influxdb_lines(decoded_packet_data) elif topic_name == DISCOVERY_TOPIC: await discovery_influxdb_lines_fast(decoded_packet_data) elif topic_name == DISCOVERY_SLOW_TOPIC: await discovery_influxdb_lines_slow(decoded_packet_data) elif topic_name == JSON_TOPIC: await add_json_lines(decoded_packet_data) else: raise GuardianException(where=MQTT_PROTOCOL_VIOLATION, message='Unknown topic %s!' % topic_name) await client.publish(ANSWER_TOPIC, 'k'.encode('utf-8'), qos=QOS_2) except GuardianException as ex: if isinstance(ex.message, (list, tuple)): printing = '{"messages":["' + '","'.join( ex.message) + '"]}' else: printing = '{"message":"' + ex.message + '"}' await client.publish(ANSWER_TOPIC, printing.encode('utf-8'), qos=QOS_2) except ClientException as ex: print("An error occurred in the Guardian MQTT client: %s", ex.__str__()) await client.unsubscribe(SUBSCRIPTION_TOPICS) await client.disconnect()
def create_stream(self, schema: str, stream: str, columns: List[Dict[Any, Any]]) -> None: validated_columns = [] primary_keys = [] for entry in columns: validated_columns.append(_create_stream_sql(entry['name'], entry['type'], entry['nullable'], entry.get('limit', None))) if entry['isTag'] is True: primary_keys.append(my_monet_escape(entry['name'])) primary_keys.append(TIMESTAMP_COLUMN_NAME) column_sql = ','.join(validated_columns) column_sql += ', PRIMARY KEY (' + ','.join(primary_keys) + ')' try: # TODO add STREAM table back! self._cursor.execute("CREATE SCHEMA IF NOT EXISTS %s" % my_monet_escape(schema)) self._cursor.execute("CREATE TABLE %s.%s (%s)" % (my_monet_escape(schema), my_monet_escape(stream), column_sql)) self._connection.commit() except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__())
def get_database_streams(self) -> List[Dict[Any, Any]]: results = [] try: # TODO add STREAM table back! WHERE type=4 tables_sql_string = """SELECT tables."id", schemas."name", tables."name" FROM (SELECT "id", "name", "schema_id" FROM sys.tables) AS tables INNER JOIN (SELECT "id", "name" FROM sys.schemas) AS schemas ON (tables."schema_id"=schemas."id") ORDER BY tables."id" """\ .replace('\n', ' ') self._cursor.execute(tables_sql_string) tables = self._cursor.fetchall() if len(tables) > 0: columns_sql_string = """SELECT columns."table_id", columns."name", columns."type", columns."null", columns."type_digits" FROM (SELECT "id", "table_id", "name", "type", "null", "number", "type_digits" FROM sys.columns) AS columns INNER JOIN (SELECT "id" FROM sys.tables) AS tables ON (tables."id"=columns."table_id") ORDER BY columns."table_id", columns."number" """\ .replace('\n', ' ') self._cursor.execute(columns_sql_string) columns = self._cursor.fetchall() self._connection.commit() grouped_columns = defaultdict(list) # group the columns to the respective tables for entry in columns: entry = list(entry) entry[2] = normalize_monetdb_type(entry[2]) array_to_append = [('name', entry[1]), ('type', entry[2]), ('nullable', entry[3])] if entry[2] in BOUNDED_TEXT_INPUTS: array_to_append.append(('limit', entry[4])) grouped_columns[entry[0]].append(OrderedDict(array_to_append)) for entry in tables: results.append(OrderedDict([('schema', entry[1]), ('stream', entry[2]), ('columns', grouped_columns[entry[0]])])) except BaseException as ex: self._connection.rollback() raise GuardianException(where=MAPI_CONNECTION_VIOLATION, message=ex.__str__()) return results
def insert_json_values(input_json: List[Dict[str, Any]]) -> None: mapi_context = get_stream_cache() errors = [] stream_entry_counter = 0 for stream_entry in input_json: stream_entry_counter += 1 # check if the stream exists next_columns = mapi_context.try_get_stream(stream_entry['schema'], stream_entry['stream']) if next_columns is None: errors.append('The stream %s.%s at line %d does not exist!' % (stream_entry['schema'], stream_entry['stream'], stream_entry_counter)) continue next_batch = [] for entry in stream_entry['values']: insert_counter = 0 next_inserts = ['NULL'] * len(next_columns) for key, value in entry.items(): insert_counter += 1 # check if the column exists in the stream found_column = None index = 0 for single_column in next_columns: if single_column['name'] == key: found_column = single_column break else: index += 1 if found_column is None: errors.append( 'The column %s does not exist in the stream %s.%s (line %d, %d)!' % (key, stream_entry['schema'], stream_entry['stream'], stream_entry_counter, insert_counter)) continue # check if the type is correct next_type_validation = TYPE_CHECK_DICT[found_column['type']] next_correct_types = next_type_validation['types'] next_type = type(value) if next_type not in next_correct_types: errors.append( 'The value for the column %s is wrong: %s not in [%s] (line %d, %d)!' % (found_column['name'], next_type.__name__, ','.join( map(lambda x: x.__name__, next_correct_types)), stream_entry_counter, insert_counter)) continue # extra validations next_extra_validation = next_type_validation['extra'] if next_extra_validation is not None: next_error = next_extra_validation(found_column, stream_entry_counter, insert_counter, value) if next_error != '': errors.append(next_error) continue next_inserts[index] = next_type_validation['converter'](value) insert_counter += 1 # add the timestamp column if missing if next_inserts[-1] == 'NULL': next_inserts[-1] = "'" + get_default_timestamp_value() + "'" next_batch.append("(" + ",".join(next_inserts) + ")") if len(next_batch) > 0: mapi_context.insert_into_stream(stream_entry['schema'], stream_entry['stream'], ','.join(next_batch)) if len(errors): raise GuardianException(where=JSON_SCHEMA_INSERT_VIOLATION, message=errors)