class PSGrammar(Grammar): LIST = Ref() OBJ = Ref() STRING = Regex(r"'(?:.)*?'") INT = Regex("\d+") BOOL = Choice(Token("$False"), Token("$True")) ni_item = Choice(OBJ, LIST, STRING, BOOL, INT) variable = Sequence(Regex('\w+'), Token('='), ni_item) LIST = Sequence('@(', Repeat(ni_item), ')') OBJ = Sequence('@{', Repeat(variable), '}') START = Choice(OBJ, LIST)
class Addb2Grammar(Grammar): r_start = Regex("^\*") r_meas_time = Regex( "[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{9}") r_meas = Regex(".*") r_attr_start = Regex("^\|") r_attr_name = Regex("[A-Za-z0-9]+") r_attr = Optional(Regex("[^|*].*")) r_hist_start = Regex("^\|") r_hist_key = Optional(Regex("[-]{0,1}[0-9]+")) r_hist_sep = Regex(":") r_hist_val = Regex("[0-9]+") r_hist_end = Regex("\|") # --1 # * 2015-04-14-15:33:11.998165453 fom-descr service: <7300000000000001:0>, sender: c28baccf27e0001 # --2 # | : 0 | # | 1 : 0 | # | .... | # | 25 : 0 | # --3 # | node <11186d8bf0e34117:ab1897c062a22573> # | .... # | fom @0x7f795008ed20, 'IO fom', transitions: 0, phase: 0 # | .... START = Repeat( Sequence( r_start, Sequence( r_meas_time, # 1 r_meas), Optional( Repeat( Sequence( r_hist_start, # 2 r_hist_key, r_hist_sep, r_hist_val, r_hist_end))), Repeat(Sequence( r_attr_start, # 3 r_attr_name, r_attr))))
class ASMTemplateGrammar(Grammar): doublespace = Regex('\s\s+') space = Regex('\s') link = Regex('<[A-Za-z0-9_|()+]+>') text = Regex('[A-Za-z0-9_[\]!,#.]+') optional = Ref() optional = Sequence('{', Repeat(Choice(link, text, optional, space), mi=1), '}') bracket_alternative = Sequence('(', Repeat(Choice(link, text, space), mi=1), '|', Repeat(Choice(link, text, space), mi=1), ')') # unbracket_alternative = Sequence(Choice(link, text), mi=1), '|', Repeat(Choice(link, text), mi=1)) optional_alternative = Sequence('{', Repeat(Choice(link, text, space), mi=1), '|', Repeat(Choice(link, text, space), mi=1), '}') START = Repeat(Choice(doublespace, space, link, text, optional_alternative, bracket_alternative, optional), mi=1) def _walk(self, element, pos, tree, rule, is_required): if self._pos != pos: self._s = self._string[pos:] #.lstrip() # don't strip whitespace self._pos = self._len_string - len(self._s) node = Node(element, self._string, self._pos) self._expecting.set_mode_required(node.start, is_required) return element._get_node_result(self, tree, rule, self._s, node)
class DSLGrammar(Grammar): numeric = Regex("[0-9]+(\.[0-9]+)?") quoted_string = Regex("(\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\")") word = Regex("(?:\\w+)") field_name = Regex("(?:\\w+)") strategy = Regex("(?:\\w+)") field_type = Choice(Keyword('string'), Keyword('int16'), Keyword('int32'), Keyword('int64'), Keyword('uint16'), Keyword('uint32'), Keyword('uint64'), Keyword('ipv4'), Keyword('ipv6')) strategy_arguments = List(Choice(numeric, quoted_string, field_name)) field = Sequence(field_type, field_name, Token('='), strategy, Token('('), strategy_arguments, Token(')')) START = Sequence(word, Token('{'), Repeat(field, mi=1), Token('}'))
def test_repeat(self): k_hi = Keyword('hi') repeat = Repeat(k_hi) grammar = create_grammar(repeat) self.assertEqual(repeat.min, 0) self.assertEqual(repeat.max, None) self.assertTrue(grammar.parse('hi hi hi').is_valid) self.assertTrue(grammar.parse('hi').is_valid) self.assertTrue(grammar.parse('').is_valid) self.assertFalse(grammar.parse('hihi').is_valid) self.assertFalse(grammar.parse('ha,').is_valid) self.assertEqual( grammar.parse('hi.').as_str(), 'error at position 2, expecting: end_of_statement or hi') self.assertEqual( grammar.parse('hi.').as_str(translate=lambda elem: ''), 'error at position 2')
def test_repeat_all_options(self): k_hi = Keyword('hi') repeat = Repeat(k_hi, mi=1, ma=3) grammar = create_grammar(repeat) self.assertEqual(repeat.min, 1) self.assertEqual(repeat.max, 3) self.assertTrue(grammar.parse('hi hi hi').is_valid) self.assertTrue(grammar.parse('hi').is_valid) self.assertFalse(grammar.parse('').is_valid) self.assertFalse(grammar.parse('hi hi hi hi').is_valid) self.assertEqual( grammar.parse('hi hi hi hi hi.').as_str(), 'error at position 8, expecting: end_of_statement') self.assertEqual( grammar.parse('hi.').as_str(), 'error at position 2, expecting: end_of_statement or hi') self.assertEqual( grammar.parse('').as_str(), 'error at position 0, expecting: hi')
class ExplanationGrammar(Grammar): name_inner = Regex('[A-Za-z0-9:\'_#]+') quoted_name = Regex('"[A-Za-z0-9:\'_ #]+"') angle_name = Regex('<[A-Za-z0-9:\'_ #]+>') imm_name = Regex('#[0-9]+') name = Choice(quoted_name, angle_name, imm_name, name_inner) types = Choice(Keyword('WREG_ZR'), Keyword('XREG_ZR'), Keyword('WREG_SP'), Keyword('XREG_SP'), Keyword('FPREG'), Keyword('FPREG_128'), Keyword('FPREG_64'), Keyword('FPREG_32'), Keyword('FPREG_16'), Keyword('FPREG_8'), Keyword('IMMEDIATE'), Keyword('SIGNED_IMMEDIATE'), Keyword('BITMASK_IMMEDIATE_32'), Keyword('BITMASK_IMMEDIATE_64'), Keyword('CONDITION'), Keyword('INVERTED_CONDITION'), Keyword('SYSREG'), Keyword('PREFETCH_OP'), Keyword('AT_INSTRUCTION'), Keyword('TLBI_INSTRUCTION'), Keyword('IC_INSTRUCTION'), Keyword('DC_INSTRUCTION'), Keyword('CONSTANT'), Keyword('BARRIER_SCOPE')) type_property = Sequence(Keyword('TYPE'), types) bits = Regex('\'[0-9]+\'') integer = Regex('[0-9]+') number = Choice(bits, integer) multiple = Sequence(name, Token('*'), number) division = Sequence(name, Token('/'), number) addition = Sequence(name, Token('+'), number) subtraction = Sequence(name, Token('-'), number) subtraction_from = Sequence(number, Token('-'), name) encoded_property = Sequence( Keyword('ENCODED'), Choice(name, multiple, division, addition, subtraction, subtraction_from)) default_property = Sequence(Keyword('DEFAULT'), Choice(name, number)) multiple_of_property = Sequence(Keyword('MULTIPLE_OF'), number) constant_value_property = Sequence(Keyword('CONSTANT_VALUE'), imm_name) expr_property = Sequence( Keyword('EXPR'), Choice(name, multiple, division, addition, subtraction, subtraction_from, Keyword('PRESENCE'))) prop = Choice(type_property, encoded_property, default_property, multiple_of_property, expr_property, constant_value_property) START = Repeat(prop, mi=1)
class SiriGrammar(Grammar): RE_KEYWORDS = re.compile('[a-z_]+') r_float = Regex('^[-+]?[0-9]*\\.?[0-9]+') r_integer = Regex('^[-+]?[0-9]+') r_uinteger = Regex('^[0-9]+') r_time_str = Regex('^[0-9]+[smhdw]') r_singleq_str = Regex('^(?:\'(?:[^\']*)\')+') r_doubleq_str = Regex('^(?:"(?:[^"]*)")+') r_grave_str = Regex('^(?:`(?:[^`]*)`)+') r_uuid_str = Regex( '^[0-9a-f]{8}\\-[0-9a-f]{4}\\-[0-9a-f]{4}\\-[0-9a-f]{4}\\-[0-9a-f]{12}' ) r_regex = Regex('^(/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/i?)') r_comment = Regex('^#.*') k_access = Keyword('access') k_active_handles = Keyword('active_handles') k_address = Keyword('address') k_after = Keyword('after') k_alter = Keyword('alter') k_and = Keyword('and') k_as = Keyword('as') k_backup_mode = Keyword('backup_mode') k_before = Keyword('before') k_buffer_size = Keyword('buffer_size') k_buffer_path = Keyword('buffer_path') k_between = Keyword('between') k_count = Keyword('count') k_create = Keyword('create') k_critical = Keyword('critical') k_database = Keyword('database') k_dbname = Keyword('dbname') k_dbpath = Keyword('dbpath') k_debug = Keyword('debug') k_derivative = Keyword('derivative') k_difference = Keyword('difference') k_drop = Keyword('drop') k_drop_threshold = Keyword('drop_threshold') k_duration_log = Keyword('duration_log') k_duration_num = Keyword('duration_num') k_end = Keyword('end') k_error = Keyword('error') k_expression = Keyword('expression') k_false = Keyword('false') k_filter = Keyword('filter') k_float = Keyword('float') k_for = Keyword('for') k_from = Keyword('from') k_full = Keyword('full') k_grant = Keyword('grant') k_group = Keyword('group') k_groups = Keyword('groups') k_help = Choice(Keyword('help'), Token('?'), most_greedy=True) k_info = Keyword('info') k_ignore_threshold = Keyword('ignore_threshold') k_insert = Keyword('insert') k_integer = Keyword('integer') k_intersection = Choice(Token('&'), Keyword('intersection'), most_greedy=False) k_ip_support = Keyword('ip_support') k_length = Keyword('length') k_libuv = Keyword('libuv') k_limit = Keyword('limit') k_list = Keyword('list') k_list_limit = Keyword('list_limit') k_log = Keyword('log') k_log_level = Keyword('log_level') k_max = Keyword('max') k_max_open_files = Keyword('max_open_files') k_mean = Keyword('mean') k_median = Keyword('median') k_median_low = Keyword('median_low') k_median_high = Keyword('median_high') k_mem_usage = Keyword('mem_usage') k_merge = Keyword('merge') k_min = Keyword('min') k_modify = Keyword('modify') k_name = Keyword('name') k_now = Keyword('now') k_number = Keyword('number') k_online = Keyword('online') k_open_files = Keyword('open_files') k_or = Keyword('or') k_password = Keyword('password') k_points = Keyword('points') k_pool = Keyword('pool') k_pools = Keyword('pools') k_port = Keyword('port') k_prefix = Keyword('prefix') k_pvariance = Keyword('pvariance') k_read = Keyword('read') k_received_points = Keyword('received_points') k_reindex_progress = Keyword('reindex_progress') k_revoke = Keyword('revoke') k_select = Keyword('select') k_select_points_limit = Keyword('select_points_limit') k_series = Keyword('series') k_server = Keyword('server') k_servers = Keyword('servers') k_set = Keyword('set') k_sid = Keyword('sid') k_shards = Keyword('shards') k_show = Keyword('show') k_size = Keyword('size') k_start = Keyword('start') k_startup_time = Keyword('startup_time') k_status = Keyword('status') k_string = Keyword('string') k_suffix = Keyword('suffix') k_sum = Keyword('sum') k_symmetric_difference = Choice(Token('^'), Keyword('symmetric_difference'), most_greedy=False) k_sync_progress = Keyword('sync_progress') k_timeit = Keyword('timeit') k_timezone = Keyword('timezone') k_time_precision = Keyword('time_precision') k_to = Keyword('to') k_true = Keyword('true') k_type = Keyword('type') k_union = Choice(Tokens(', |'), Keyword('union'), most_greedy=False) k_uptime = Keyword('uptime') k_user = Keyword('user') k_users = Keyword('users') k_using = Keyword('using') k_uuid = Keyword('uuid') k_variance = Keyword('variance') k_version = Keyword('version') k_warning = Keyword('warning') k_where = Keyword('where') k_who_am_i = Keyword('who_am_i') k_write = Keyword('write') c_difference = Choice(Token('-'), k_difference, most_greedy=False) access_keywords = Choice(k_read, k_write, k_modify, k_full, k_select, k_show, k_list, k_count, k_create, k_insert, k_drop, k_grant, k_revoke, k_alter, most_greedy=False) _boolean = Choice(k_true, k_false, most_greedy=False) log_keywords = Choice(k_debug, k_info, k_warning, k_error, k_critical, most_greedy=False) int_expr = Prio(r_integer, Sequence(Token('('), THIS, Token(')')), Sequence(THIS, Tokens('+ - * % /'), THIS)) string = Choice(r_singleq_str, r_doubleq_str, most_greedy=False) time_expr = Prio(r_time_str, k_now, string, r_integer, Sequence(Token('('), THIS, Token(')')), Sequence(THIS, Tokens('+ - * % /'), THIS)) series_columns = List( Choice(k_name, k_type, k_length, k_start, k_end, k_pool, most_greedy=False), Token(','), 1, None, False) shard_columns = List( Choice(k_sid, k_pool, k_server, k_size, k_start, k_end, k_type, k_status, most_greedy=False), Token(','), 1, None, False) server_columns = List( Choice(k_address, k_buffer_path, k_buffer_size, k_dbpath, k_ip_support, k_libuv, k_name, k_port, k_uuid, k_pool, k_version, k_online, k_startup_time, k_status, k_active_handles, k_log_level, k_max_open_files, k_mem_usage, k_open_files, k_received_points, k_reindex_progress, k_sync_progress, k_uptime, most_greedy=False), Token(','), 1, None, False) group_columns = List( Choice(k_expression, k_name, k_series, most_greedy=False), Token(','), 1, None, False) user_columns = List(Choice(k_name, k_access, most_greedy=False), Token(','), 1, None, False) pool_props = Choice(k_pool, k_servers, k_series, most_greedy=False) pool_columns = List(pool_props, Token(','), 1, None, False) bool_operator = Tokens('== !=') int_operator = Tokens('== != <= >= < >') str_operator = Tokens('== != <= >= !~ < > ~') where_group = Sequence( k_where, Prio( Sequence(k_series, int_operator, int_expr), Sequence(Choice(k_expression, k_name, most_greedy=False), str_operator, string), Sequence(Token('('), THIS, Token(')')), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) where_pool = Sequence( k_where, Prio(Sequence(pool_props, int_operator, int_expr), Sequence(Token('('), THIS, Token(')')), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) where_series = Sequence( k_where, Prio( Sequence(Choice(k_length, k_pool, most_greedy=False), int_operator, int_expr), Sequence(k_name, str_operator, string), Sequence(Choice(k_start, k_end, most_greedy=False), int_operator, time_expr), Sequence(k_type, bool_operator, Choice(k_string, k_integer, k_float, most_greedy=False)), Sequence(Token('('), THIS, Token(')')), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) where_server = Sequence( k_where, Prio( Sequence( Choice(k_active_handles, k_buffer_size, k_port, k_pool, k_startup_time, k_max_open_files, k_mem_usage, k_open_files, k_received_points, k_uptime, most_greedy=False), int_operator, int_expr), Sequence( Choice(k_address, k_buffer_path, k_dbpath, k_ip_support, k_libuv, k_name, k_uuid, k_version, k_status, k_reindex_progress, k_sync_progress, most_greedy=False), str_operator, string), Sequence(k_online, bool_operator, _boolean), Sequence(k_log_level, int_operator, log_keywords), Sequence(Token('('), THIS, Token(')')), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) where_shard = Sequence( k_where, Prio( Sequence(Choice(k_sid, k_pool, k_size, most_greedy=False), int_operator, int_expr), Sequence(Choice(k_server, k_status, most_greedy=True), str_operator, string), Sequence(Choice(k_start, k_end, most_greedy=False), int_operator, time_expr), Sequence(k_type, bool_operator, Choice(k_number, k_log, most_greedy=False)), Sequence(Token('('), THIS, Token(')')), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) where_user = Sequence( k_where, Prio(Sequence(k_name, str_operator, string), Sequence(k_access, int_operator, access_keywords), Sequence(Token('('), THIS, Token(')')), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) series_sep = Choice(k_union, c_difference, k_intersection, k_symmetric_difference, most_greedy=False) series_name = Repeat(string, 1, 1) group_name = Repeat(r_grave_str, 1, 1) series_re = Repeat(r_regex, 1, 1) uuid = Choice(r_uuid_str, string, most_greedy=False) group_match = Repeat(r_grave_str, 1, 1) series_match = List( Choice(series_name, group_match, series_re, most_greedy=False), series_sep, 1, None, False) limit_expr = Sequence(k_limit, int_expr) before_expr = Sequence(k_before, time_expr) after_expr = Sequence(k_after, time_expr) between_expr = Sequence(k_between, time_expr, k_and, time_expr) access_expr = List(access_keywords, Token(','), 1, None, False) prefix_expr = Sequence(k_prefix, string) suffix_expr = Sequence(k_suffix, string) f_points = Choice(Token('*'), k_points, most_greedy=False) f_difference = Sequence(k_difference, Token('('), Optional(time_expr), Token(')')) f_derivative = Sequence(k_derivative, Token('('), List(time_expr, Token(','), 0, 2, False), Token(')')) f_mean = Sequence(k_mean, Token('('), time_expr, Token(')')) f_median = Sequence(k_median, Token('('), time_expr, Token(')')) f_median_low = Sequence(k_median_low, Token('('), time_expr, Token(')')) f_median_high = Sequence(k_median_high, Token('('), time_expr, Token(')')) f_sum = Sequence(k_sum, Token('('), time_expr, Token(')')) f_min = Sequence(k_min, Token('('), time_expr, Token(')')) f_max = Sequence(k_max, Token('('), time_expr, Token(')')) f_count = Sequence(k_count, Token('('), time_expr, Token(')')) f_variance = Sequence(k_variance, Token('('), time_expr, Token(')')) f_pvariance = Sequence(k_pvariance, Token('('), time_expr, Token(')')) f_filter = Sequence(k_filter, Token('('), Optional(str_operator), Choice(string, r_integer, r_float, most_greedy=True), Token(')')) f_limit = Sequence( k_limit, Token('('), int_expr, Token(','), Choice(k_mean, k_median, k_median_high, k_median_low, k_sum, k_min, k_max, k_count, k_variance, k_pvariance, most_greedy=False), Token(')')) aggregate_functions = List( Choice(f_points, f_limit, f_mean, f_sum, f_median, f_median_low, f_median_high, f_min, f_max, f_count, f_variance, f_pvariance, f_difference, f_derivative, f_filter, most_greedy=False), Token('=>'), 1, None, False) select_aggregate = Sequence(aggregate_functions, Optional(prefix_expr), Optional(suffix_expr)) merge_as = Sequence(k_merge, k_as, string, Optional(Sequence(k_using, aggregate_functions))) set_address = Sequence(k_set, k_address, string) set_backup_mode = Sequence(k_set, k_backup_mode, _boolean) set_drop_threshold = Sequence(k_set, k_drop_threshold, r_float) set_expression = Sequence(k_set, k_expression, r_regex) set_ignore_threshold = Sequence(k_set, k_ignore_threshold, _boolean) set_list_limit = Sequence(k_set, k_list_limit, r_uinteger) set_log_level = Sequence(k_set, k_log_level, log_keywords) set_name = Sequence(k_set, k_name, string) set_password = Sequence(k_set, k_password, string) set_port = Sequence(k_set, k_port, r_uinteger) set_select_points_limit = Sequence(k_set, k_select_points_limit, r_uinteger) set_timezone = Sequence(k_set, k_timezone, string) alter_database = Sequence( k_database, Choice(set_drop_threshold, set_list_limit, set_select_points_limit, set_timezone, most_greedy=False)) alter_group = Sequence(k_group, group_name, Choice(set_expression, set_name, most_greedy=False)) alter_server = Sequence( k_server, uuid, Choice(set_log_level, set_backup_mode, set_address, set_port, most_greedy=False)) alter_servers = Sequence(k_servers, Optional(where_server), set_log_level) alter_user = Sequence(k_user, string, Choice(set_password, set_name, most_greedy=False)) count_groups = Sequence(k_groups, Optional(where_group)) count_pools = Sequence(k_pools, Optional(where_pool)) count_series = Sequence(k_series, Optional(series_match), Optional(where_series)) count_servers = Sequence(k_servers, Optional(where_server)) count_servers_received = Sequence(k_servers, k_received_points, Optional(where_server)) count_shards = Sequence(k_shards, Optional(where_shard)) count_shards_size = Sequence(k_shards, k_size, Optional(where_shard)) count_users = Sequence(k_users, Optional(where_user)) count_series_length = Sequence(k_series, k_length, Optional(series_match), Optional(where_series)) create_group = Sequence(k_group, group_name, k_for, r_regex) create_user = Sequence(k_user, string, set_password) drop_group = Sequence(k_group, group_name) drop_series = Sequence(k_series, Optional(series_match), Optional(where_series), Optional(set_ignore_threshold)) drop_shards = Sequence(k_shards, Optional(where_shard), Optional(set_ignore_threshold)) drop_server = Sequence(k_server, uuid) drop_user = Sequence(k_user, string) grant_user = Sequence(k_user, string, Optional(set_password)) list_groups = Sequence(k_groups, Optional(group_columns), Optional(where_group)) list_pools = Sequence(k_pools, Optional(pool_columns), Optional(where_pool)) list_series = Sequence(k_series, Optional(series_columns), Optional(series_match), Optional(where_series)) list_servers = Sequence(k_servers, Optional(server_columns), Optional(where_server)) list_shards = Sequence(k_shards, Optional(shard_columns), Optional(where_shard)) list_users = Sequence(k_users, Optional(user_columns), Optional(where_user)) revoke_user = Sequence(k_user, string) alter_stmt = Sequence( k_alter, Choice(alter_user, alter_group, alter_server, alter_servers, alter_database, most_greedy=False)) calc_stmt = Repeat(time_expr, 1, 1) count_stmt = Sequence( k_count, Choice(count_groups, count_pools, count_series, count_servers, count_servers_received, count_shards, count_shards_size, count_users, count_series_length, most_greedy=True)) create_stmt = Sequence(k_create, Choice(create_group, create_user, most_greedy=True)) drop_stmt = Sequence( k_drop, Choice(drop_group, drop_series, drop_shards, drop_server, drop_user, most_greedy=False)) grant_stmt = Sequence(k_grant, access_expr, k_to, Choice(grant_user, most_greedy=False)) list_stmt = Sequence( k_list, Choice(list_series, list_users, list_shards, list_groups, list_servers, list_pools, most_greedy=False), Optional(limit_expr)) revoke_stmt = Sequence(k_revoke, access_expr, k_from, Choice(revoke_user, most_greedy=False)) select_stmt = Sequence( k_select, List(select_aggregate, Token(','), 1, None, False), k_from, series_match, Optional(where_series), Optional( Choice(after_expr, between_expr, before_expr, most_greedy=False)), Optional(merge_as)) show_stmt = Sequence( k_show, List( Choice(k_active_handles, k_buffer_path, k_buffer_size, k_dbname, k_dbpath, k_drop_threshold, k_duration_log, k_duration_num, k_ip_support, k_libuv, k_list_limit, k_log_level, k_max_open_files, k_mem_usage, k_open_files, k_pool, k_received_points, k_reindex_progress, k_select_points_limit, k_server, k_startup_time, k_status, k_sync_progress, k_time_precision, k_timezone, k_uptime, k_uuid, k_version, k_who_am_i, most_greedy=False), Token(','), 0, None, False)) timeit_stmt = Repeat(k_timeit, 1, 1) help_select = Keyword('select') help_grant = Keyword('grant') help_create_group = Keyword('group') help_create_user = Keyword('user') help_create = Sequence( k_create, Optional(Choice(help_create_group, help_create_user, most_greedy=True))) help_functions = Keyword('functions') help_timezones = Keyword('timezones') help_drop_group = Keyword('group') help_drop_series = Keyword('series') help_drop_shards = Keyword('shards') help_drop_server = Keyword('server') help_drop_user = Keyword('user') help_drop = Sequence( k_drop, Optional( Choice(help_drop_group, help_drop_series, help_drop_shards, help_drop_server, help_drop_user, most_greedy=True))) help_access = Keyword('access') help_list_series = Keyword('series') help_list_servers = Keyword('servers') help_list_pools = Keyword('pools') help_list_users = Keyword('users') help_list_groups = Keyword('groups') help_list_shards = Keyword('shards') help_list = Sequence( k_list, Optional( Choice(help_list_series, help_list_servers, help_list_pools, help_list_users, help_list_groups, help_list_shards, most_greedy=True))) help_alter_user = Keyword('user') help_alter_database = Keyword('database') help_alter_server = Keyword('server') help_alter_group = Keyword('group') help_alter_servers = Keyword('servers') help_alter = Sequence( k_alter, Optional( Choice(help_alter_user, help_alter_database, help_alter_server, help_alter_group, help_alter_servers, most_greedy=True))) help_count_groups = Keyword('groups') help_count_pools = Keyword('pools') help_count_users = Keyword('users') help_count_shards = Keyword('shards') help_count_series = Keyword('series') help_count_servers = Keyword('servers') help_count = Sequence( k_count, Optional( Choice(help_count_groups, help_count_pools, help_count_users, help_count_shards, help_count_series, help_count_servers, most_greedy=True))) help_noaccess = Keyword('noaccess') help_timeit = Keyword('timeit') help_show = Keyword('show') help_revoke = Keyword('revoke') help = Sequence( k_help, Optional( Choice(help_select, help_grant, help_create, help_functions, help_timezones, help_drop, help_access, help_list, help_alter, help_count, help_noaccess, help_timeit, help_show, help_revoke, most_greedy=True))) START = Sequence( Optional(timeit_stmt), Optional( Choice(select_stmt, list_stmt, count_stmt, alter_stmt, create_stmt, drop_stmt, grant_stmt, revoke_stmt, show_stmt, calc_stmt, help, most_greedy=False)), Optional(r_comment))
from pyleri import Repeat from parser import _regexes as reg Numbers = Repeat(reg.Number, 1, None) TagkeysFunction = Repeat(reg.Letter, 1, None)
class LangDef(Grammar): RE_KEYWORDS = re.compile(RE_NAME) x_array = Token('[') x_assign = Tokens('= += -= *= /= %= &= ^= |=') x_block = Token('{') x_chain = Token('.') x_closure = Token('|') x_function = Token('(') x_index = Token('[') x_parenthesis = Token('(') x_preopr = Regex(r'(\s*!|\s*[\-+](?=[^0-9]))*') x_ternary = Token('?') x_thing = Token('{') r_single_quote = Regex(r"(?:'(?:[^']*)')+") r_double_quote = Regex(r'(?:"(?:[^"]*)")+') template = Sequence( '`', Repeat(Choice(Regex(r"([^`{}]|``|{{|}})+"), Sequence('{', THIS, '}'))), '`') thing_by_id = Regex(r'#[0-9]+') t_false = Keyword('false') t_float = Regex( r'[-+]?((inf|nan)([^0-9A-Za-z_]|$)|[0-9]*\.[0-9]+(e[+-][0-9]+)?)') t_int = Regex(r'[-+]?((0b[01]+)|(0o[0-8]+)|(0x[0-9a-fA-F]+)|([0-9]+))') t_nil = Keyword('nil') t_regex = Regex('/[^/\\\\]+(?:\\\\.[^/\\\\]*)*/i?') t_string = Choice(r_single_quote, r_double_quote) t_true = Keyword('true') comments = Repeat( Choice( Regex(r'(?s)//.*?(\r?\n|$)'), # Single line comment Regex(r'(?s)/\*.*?\*/'), # Block comment )) name = Regex(RE_NAME) var = Regex(RE_NAME) chain = Ref() t_closure = Sequence(x_closure, List(var), '|', THIS) thing = Sequence(x_thing, List(Sequence(name, ':', THIS)), '}') array = Sequence(x_array, List(THIS), ']') function = Sequence(x_function, List(THIS), ')') instance = Repeat(thing, mi=1, ma=1) # will be exported as `cleri_dup_t` enum_ = Sequence(x_thing, Choice(name, t_closure), '}') opr0_mul_div_mod = Tokens('* / %') opr1_add_sub = Tokens('+ -') opr2_bitwise_and = Tokens('&') opr3_bitwise_xor = Tokens('^') opr4_bitwise_or = Tokens('|') opr5_compare = Tokens('< > == != <= >=') opr6_cmp_and = Token('&&') opr7_cmp_or = Token('||') opr8_ternary = Sequence(x_ternary, THIS, ':') operations = Sequence( THIS, Choice( # make sure `ternary`, `and` and `or` is on top so we can stop # at the first match opr8_ternary, opr7_cmp_or, opr6_cmp_and, opr5_compare, opr4_bitwise_or, opr3_bitwise_xor, opr2_bitwise_and, opr1_add_sub, opr0_mul_div_mod, ), THIS) assign = Sequence(x_assign, THIS) name_opt_more = Sequence(name, Optional(Choice(function, assign))) var_opt_more = Sequence( var, Optional(Choice(function, assign, instance, enum_))) # note: slice is also used for a simple index slice = List(Optional(THIS), delimiter=':', ma=3, opt=False) index = Repeat( Sequence(x_index, slice, ']', Optional(Sequence(x_assign, THIS)))) chain = Sequence( x_chain, name_opt_more, index, Optional(chain), ) block = Sequence(x_block, comments, List(THIS, delimiter=Sequence(';', comments), mi=1), '}') parenthesis = Sequence(x_parenthesis, THIS, ')') expression = Sequence( x_preopr, Choice( chain, thing_by_id, # start immutable values t_false, t_nil, t_true, t_float, t_int, t_string, t_regex, t_closure, # end immutable values template, var_opt_more, thing, array, block, parenthesis, ), index, Optional(chain), ) statement = Prio(expression, operations) statements = List(statement, delimiter=Sequence(';', comments)) START = Sequence(comments, statements)
class SiriGrammar(Grammar): ''' SiriDB grammar. Note: choices can be optimized using most_greedy=False when there is a preferable order in choices. This only should be used when there's no conflict in making a decision by the parser. (e.g. two choices should start with the same keyword because in that case we should usually take the most greedy one) ''' RE_KEYWORDS = re.compile('[a-z_]+') # Regular expressions r_float = Regex('[-+]?[0-9]*\.?[0-9]+') r_integer = Regex('[-+]?[0-9]+') r_uinteger = Regex('[0-9]+') r_time_str = Regex('[0-9]+[smhdw]') r_singleq_str = Regex('(?:\'(?:[^\']*)\')+') r_doubleq_str = Regex('(?:"(?:[^"]*)")+') r_grave_str = Regex('(?:`(?:[^`]*)`)+') r_uuid_str = Regex( '[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}') # we only allow an optional 'i' for case-insensitive regex r_regex = Regex('(/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/i?)') r_comment = Regex('#.*') # Keywords k_access = Keyword('access') k_active_handles = Keyword('active_handles') k_address = Keyword('address') k_after = Keyword('after') k_alter = Keyword('alter') k_and = Keyword('and') k_as = Keyword('as') k_backup_mode = Keyword('backup_mode') k_before = Keyword('before') k_buffer_size = Keyword('buffer_size') k_buffer_path = Keyword('buffer_path') k_between = Keyword('between') k_count = Keyword('count') k_create = Keyword('create') k_critical = Keyword('critical') k_database = Keyword('database') k_dbname = Keyword('dbname') k_dbpath = Keyword('dbpath') k_debug = Keyword('debug') k_derivative = Keyword('derivative') k_difference = Keyword('difference') k_drop = Keyword('drop') k_drop_threshold = Keyword('drop_threshold') k_duration_log = Keyword('duration_log') k_duration_num = Keyword('duration_num') k_end = Keyword('end') k_error = Keyword('error') k_expression = Keyword('expression') k_false = Keyword('false') k_filter = Keyword('filter') k_float = Keyword('float') k_for = Keyword('for') k_from = Keyword('from') k_full = Keyword('full') k_grant = Keyword('grant') k_group = Keyword('group') k_groups = Keyword('groups') k_help = Choice(Keyword('help'), Token('?')) k_info = Keyword('info') k_ignore_threshold = Keyword('ignore_threshold') k_insert = Keyword('insert') k_integer = Keyword('integer') k_intersection = Choice(Token('&'), Keyword('intersection'), most_greedy=False) k_ip_support = Keyword('ip_support') k_length = Keyword('length') k_libuv = Keyword('libuv') k_limit = Keyword('limit') k_list = Keyword('list') k_log = Keyword('log') k_log_level = Keyword('log_level') k_max = Keyword('max') k_max_open_files = Keyword('max_open_files') k_mean = Keyword('mean') k_median = Keyword('median') k_median_low = Keyword('median_low') k_median_high = Keyword('median_high') k_mem_usage = Keyword('mem_usage') k_merge = Keyword('merge') k_min = Keyword('min') k_modify = Keyword('modify') k_name = Keyword('name') k_now = Keyword('now') k_number = Keyword('number') k_online = Keyword('online') k_open_files = Keyword('open_files') k_or = Keyword('or') k_password = Keyword('password') k_points = Keyword('points') k_pool = Keyword('pool') k_pools = Keyword('pools') k_port = Keyword('port') k_prefix = Keyword('prefix') k_pvariance = Keyword('pvariance') k_read = Keyword('read') k_received_points = Keyword('received_points') k_reindex_progress = Keyword('reindex_progress') k_revoke = Keyword('revoke') k_select = Keyword('select') k_series = Keyword('series') k_server = Keyword('server') k_servers = Keyword('servers') k_set = Keyword('set') k_sid = Keyword('sid') k_shards = Keyword('shards') k_show = Keyword('show') k_size = Keyword('size') k_start = Keyword('start') k_startup_time = Keyword('startup_time') k_status = Keyword('status') k_string = Keyword('string') k_suffix = Keyword('suffix') k_sum = Keyword('sum') k_symmetric_difference = Choice(Token('^'), Keyword('symmetric_difference'), most_greedy=False) k_sync_progress = Keyword('sync_progress') k_timeit = Keyword('timeit') k_timezone = Keyword('timezone') k_time_precision = Keyword('time_precision') k_to = Keyword('to') k_true = Keyword('true') k_type = Keyword('type') k_union = Choice(Tokens(', |'), Keyword('union'), most_greedy=False) k_uptime = Keyword('uptime') k_user = Keyword('user') k_users = Keyword('users') k_using = Keyword('using') k_uuid = Keyword('uuid') k_variance = Keyword('variance') k_version = Keyword('version') k_warning = Keyword('warning') k_where = Keyword('where') k_who_am_i = Keyword('who_am_i') k_write = Keyword('write') c_difference = Choice(Token('-'), k_difference, most_greedy=False) access_keywords = Choice(k_read, k_write, k_modify, k_full, k_select, k_show, k_list, k_count, k_create, k_insert, k_drop, k_grant, k_revoke, k_alter, most_greedy=False) _boolean = Choice(k_true, k_false, most_greedy=False) log_keywords = Choice(k_debug, k_info, k_warning, k_error, k_critical, most_greedy=False) int_expr = Prio(r_integer, Sequence('(', THIS, ')'), Sequence(THIS, Tokens('+ - * % /'), THIS)) string = Choice(r_singleq_str, r_doubleq_str, most_greedy=False) time_expr = Prio(r_time_str, k_now, string, r_integer, Sequence('(', THIS, ')'), Sequence(THIS, Tokens('+ - * % /'), THIS)) series_columns = List( Choice(k_name, k_type, k_length, k_start, k_end, k_pool, most_greedy=False), ',', 1) shard_columns = List( Choice(k_sid, k_pool, k_server, k_size, k_start, k_end, k_type, k_status, most_greedy=False), ',', 1) server_columns = List( Choice( # Local properties k_address, k_buffer_path, k_buffer_size, k_dbpath, k_ip_support, k_libuv, k_name, k_port, k_uuid, k_pool, k_version, k_online, k_startup_time, k_status, # Remote properties k_active_handles, k_log_level, k_max_open_files, k_mem_usage, k_open_files, k_received_points, k_reindex_progress, k_sync_progress, k_uptime, most_greedy=False), ',', 1) group_columns = List( Choice(k_expression, k_name, k_series, most_greedy=False), ',', 1) user_columns = List(Choice(k_name, k_access, most_greedy=False), ',', 1) pool_props = Choice(k_pool, k_servers, k_series, most_greedy=False) pool_columns = List(pool_props, ',', 1) bool_operator = Tokens('== !=') int_operator = Tokens('< > == != <= >=') str_operator = Tokens('< > == != <= >= ~ !~') # where group where_group = Sequence( k_where, Prio( Sequence(k_series, int_operator, int_expr), Sequence(Choice(k_expression, k_name, most_greedy=False), str_operator, string), Sequence('(', THIS, ')'), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) # where pool where_pool = Sequence( k_where, Prio(Sequence(pool_props, int_operator, int_expr), Sequence('(', THIS, ')'), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) # where series where_series = Sequence( k_where, Prio( Sequence(Choice(k_length, k_pool, most_greedy=False), int_operator, int_expr), Sequence(k_name, str_operator, string), Sequence(Choice(k_start, k_end, most_greedy=False), int_operator, time_expr), Sequence(k_type, bool_operator, Choice(k_string, k_integer, k_float, most_greedy=False)), Sequence('(', THIS, ')'), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) # where server where_server = Sequence( k_where, Prio( Sequence( Choice(k_active_handles, k_buffer_size, k_port, k_pool, k_startup_time, k_max_open_files, k_mem_usage, k_open_files, k_received_points, k_uptime, most_greedy=False), int_operator, int_expr), Sequence( Choice(k_address, k_buffer_path, k_dbpath, k_ip_support, k_libuv, k_name, k_uuid, k_version, k_status, k_reindex_progress, k_sync_progress, most_greedy=False), str_operator, string), Sequence(k_online, bool_operator, _boolean), Sequence(k_log_level, int_operator, log_keywords), Sequence('(', THIS, ')'), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) # where shard where_shard = Sequence( k_where, Prio( Sequence(Choice(k_sid, k_pool, k_size, most_greedy=False), int_operator, int_expr), Sequence(Choice(k_server, k_status), str_operator, string), Sequence(Choice(k_start, k_end, most_greedy=False), int_operator, time_expr), Sequence(k_type, bool_operator, Choice(k_number, k_log, most_greedy=False)), Sequence('(', THIS, ')'), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) # where user where_user = Sequence( k_where, Prio(Sequence(k_name, str_operator, string), Sequence(k_access, int_operator, access_keywords), Sequence('(', THIS, ')'), Sequence(THIS, k_and, THIS), Sequence(THIS, k_or, THIS))) series_sep = Choice(k_union, c_difference, k_intersection, k_symmetric_difference, most_greedy=False) series_name = Repeat(string, 1, 1) group_name = Repeat(r_grave_str, 1, 1) series_re = Repeat(r_regex, 1, 1) uuid = Choice(r_uuid_str, string, most_greedy=False) group_match = Repeat(r_grave_str, 1, 1) series_match = List( Choice(series_name, group_match, series_re, most_greedy=False), series_sep, 1) limit_expr = Sequence(k_limit, int_expr) before_expr = Sequence(k_before, time_expr) after_expr = Sequence(k_after, time_expr) between_expr = Sequence(k_between, time_expr, k_and, time_expr) access_expr = List(access_keywords, ',', 1) prefix_expr = Sequence(k_prefix, string) suffix_expr = Sequence(k_suffix, string) f_points = Choice(Token('*'), k_points, most_greedy=False) f_difference = Sequence(k_difference, '(', Optional(time_expr), ')') f_derivative = Sequence(k_derivative, '(', List(time_expr, ',', 0, 2), ')') f_mean = Sequence(k_mean, '(', time_expr, ')') f_median = Sequence(k_median, '(', time_expr, ')') f_median_low = Sequence(k_median_low, '(', time_expr, ')') f_median_high = Sequence(k_median_high, '(', time_expr, ')') f_sum = Sequence(k_sum, '(', time_expr, ')') f_min = Sequence(k_min, '(', time_expr, ')') f_max = Sequence(k_max, '(', time_expr, ')') f_count = Sequence(k_count, '(', time_expr, ')') f_variance = Sequence(k_variance, '(', time_expr, ')') f_pvariance = Sequence(k_pvariance, '(', time_expr, ')') f_filter = Sequence(k_filter, '(', Optional(str_operator), Choice(string, r_integer, r_float, most_greedy=True), ')') f_limit = Sequence( k_limit, '(', int_expr, ',', Choice(k_mean, k_median, k_median_high, k_median_low, k_sum, k_min, k_max, k_count, k_variance, k_pvariance, most_greedy=False), ')') aggregate_functions = List( Choice(f_points, f_limit, f_mean, f_sum, f_median, f_median_low, f_median_high, f_min, f_max, f_count, f_variance, f_pvariance, f_difference, f_derivative, f_filter, most_greedy=False), '=>', 1) select_aggregate = Sequence(aggregate_functions, Optional(prefix_expr), Optional(suffix_expr)) merge_as = Sequence(k_merge, k_as, string, Optional(Sequence(k_using, aggregate_functions))) set_address = Sequence(k_set, k_address, string) set_backup_mode = Sequence(k_set, k_backup_mode, _boolean) set_drop_threshold = Sequence(k_set, k_drop_threshold, r_float) set_expression = Sequence(k_set, k_expression, r_regex) set_ignore_threshold = Sequence(k_set, k_ignore_threshold, _boolean) set_log_level = Sequence(k_set, k_log_level, log_keywords) set_name = Sequence(k_set, k_name, string) set_password = Sequence(k_set, k_password, string) set_port = Sequence(k_set, k_port, r_uinteger) set_timezone = Sequence(k_set, k_timezone, string) alter_database = Sequence( k_database, Choice(set_drop_threshold, set_timezone, most_greedy=False)) alter_group = Sequence(k_group, group_name, Choice(set_expression, set_name, most_greedy=False)) alter_server = Sequence( k_server, uuid, Choice(set_log_level, set_backup_mode, set_address, set_port, most_greedy=False)) alter_servers = Sequence(k_servers, Optional(where_server), set_log_level) alter_user = Sequence(k_user, string, Choice(set_password, set_name, most_greedy=False)) count_groups = Sequence(k_groups, Optional(where_group)) count_pools = Sequence(k_pools, Optional(where_pool)) count_series = Sequence(k_series, Optional(series_match), Optional(where_series)) count_servers = Sequence(k_servers, Optional(where_server)) count_servers_received = Sequence(k_servers, k_received_points, Optional(where_server)) count_shards = Sequence(k_shards, Optional(where_shard)) count_shards_size = Sequence(k_shards, k_size, Optional(where_shard)) count_users = Sequence(k_users, Optional(where_user)) count_series_length = Sequence(k_series, k_length, Optional(series_match), Optional(where_series)) create_group = Sequence(k_group, group_name, k_for, r_regex) create_user = Sequence(k_user, string, set_password) drop_group = Sequence(k_group, group_name) # Drop statement needs at least a series_math or where STMT or both drop_series = Sequence(k_series, Optional(series_match), Optional(where_series), Optional(set_ignore_threshold)) drop_shards = Sequence(k_shards, Optional(where_shard), Optional(set_ignore_threshold)) drop_server = Sequence(k_server, uuid) drop_user = Sequence(k_user, string) grant_user = Sequence(k_user, string, Optional(set_password)) list_groups = Sequence(k_groups, Optional(group_columns), Optional(where_group)) list_pools = Sequence(k_pools, Optional(pool_columns), Optional(where_pool)) list_series = Sequence(k_series, Optional(series_columns), Optional(series_match), Optional(where_series)) list_servers = Sequence(k_servers, Optional(server_columns), Optional(where_server)) list_shards = Sequence(k_shards, Optional(shard_columns), Optional(where_shard)) list_users = Sequence(k_users, Optional(user_columns), Optional(where_user)) revoke_user = Sequence(k_user, string) alter_stmt = Sequence( k_alter, Choice(alter_user, alter_group, alter_server, alter_servers, alter_database, most_greedy=False)) calc_stmt = Repeat(time_expr, 1, 1) count_stmt = Sequence( k_count, Choice(count_groups, count_pools, count_series, count_servers, count_servers_received, count_shards, count_shards_size, count_users, count_series_length, most_greedy=True)) create_stmt = Sequence(k_create, Choice(create_group, create_user)) drop_stmt = Sequence( k_drop, Choice(drop_group, drop_series, drop_shards, drop_server, drop_user, most_greedy=False)) grant_stmt = Sequence(k_grant, access_expr, k_to, Choice(grant_user, most_greedy=False)) list_stmt = Sequence( k_list, Choice(list_series, list_users, list_shards, list_groups, list_servers, list_pools, most_greedy=False), Optional(limit_expr)) revoke_stmt = Sequence(k_revoke, access_expr, k_from, Choice(revoke_user, most_greedy=False)) select_stmt = Sequence( k_select, List(select_aggregate, ',', 1), k_from, series_match, Optional(where_series), Optional( Choice(after_expr, between_expr, before_expr, most_greedy=False)), Optional(merge_as)) show_stmt = Sequence( k_show, List( Choice(k_active_handles, k_buffer_path, k_buffer_size, k_dbname, k_dbpath, k_drop_threshold, k_duration_log, k_duration_num, k_ip_support, k_libuv, k_log_level, k_max_open_files, k_mem_usage, k_open_files, k_pool, k_received_points, k_reindex_progress, k_server, k_startup_time, k_status, k_sync_progress, k_time_precision, k_timezone, k_uptime, k_uuid, k_version, k_who_am_i, most_greedy=False), ',', 0)) timeit_stmt = Repeat(k_timeit, 1, 1)
class MyGrammar(Grammar): RE_KEYWORDS = re.compile('\S+') r_name = Keyword('"pyleri"') k_hi = Keyword('hi') k_bye = Keyword('bye') START = Repeat(Sequence(Choice(k_hi, k_bye), r_name), mi=2)
class MyGrammar(Grammar): r_name = Regex('(?:"(?:[^"]*)")+') k_hi = Keyword('hi') k_bye = Keyword('bye') START = Repeat(Sequence(Choice(k_hi, k_bye), r_name))