def test_graphite_glob_parser(self): scenarii = [ # Positive examples ("a.b", [['a'], ['b']], True), ("a.{b}", [['a'], ['b']], True), ("a?b.c", [['a', bg_glob.AnyChar(), 'b'], ['c']], False), ("a.b*c", [['a'], ['b', bg_glob.AnySequence(), 'c']], False), ("a.b**c", [['a'], ['b'], bg_glob.Globstar(), ['c']], False), ("a.**.c", [['a'], bg_glob.Globstar(), ['c']], False), ("a.**", [['a'], bg_glob.Globstar()], False), ("a[xyz].b", [['a', bg_glob.CharIn(['x', 'y', 'z'])], ['b']], False), ("a[!rat].b", [['a', bg_glob.CharNotIn(['r', 'a', 't'])], ['b']], False), ("pl[a-ox]p", [['pl', bg_glob.CharIn(['a-o', 'x']), 'p']], False), ("a[b-dopx-z]b.c", [['a', bg_glob.CharIn(['b-d', 'o', 'p', 'x-z']), 'b'], ['c']], False), ("b[i\\]m", [['b', bg_glob.CharIn(['\\', 'i']), 'm']], False), ("a[x-xy]b", [['a', bg_glob.CharIn(['x-x', 'y']), 'b']], False), ("a[y-xz]b", [['a', bg_glob.CharIn(['y-x', 'z']), 'b']], False), ("a.b.{c,d}", [['a'], ['b'], [bg_glob.SequenceIn(['c', 'd'])]], False), ("a.b.{c,d}-{e,f}", [['a'], ['b'], [ bg_glob.SequenceIn(['c', 'd']), '-', bg_glob.SequenceIn(['e', 'f']) ]], False), ("a.b.oh{c{d,e,}{a,b},f{g,h}i}ah", [['a'], ['b'], [ 'oh', bg_glob.SequenceIn([ 'ca', 'cb', 'cda', 'cdb', 'cea', 'ceb', 'fgi', 'fhi' ]), 'ah' ]], False), ("a.b{some, x{chars[!xyz], plop}}c", [['a'], ['b', bg_glob.AnySequence(), 'c']], False), # Negative examples ("a[.b", [['a['], ['b']], True), ("a{.b", [['a{'], ['b']], True), ("a{.b.c}", [['a{'], ['b'], ['c}']], True), ("a.", [['a']], True), ("a..b", [['a'], ['b']], True), ] parser = bg_glob.GraphiteGlobParser() for i, (glob, expected, fully_defined) in enumerate(scenarii): parsed = parser.parse(glob) self.assertSequenceEqual(expected, parsed) self.assertEqual(fully_defined, parser.is_fully_defined(parsed), parsed)
def __generate_globstar_names_queries(self, table, components): # Handling more than one of these can cause combinatorial explosion. if components.count(GLOBSTAR) > 1: raise bg_accessor.InvalidGlobError( "Contains more than one globstar (**) operator") # If the globstar operator is at the end of the pattern, then we can # find corresponding metrics with a prefix search; # otherwise, we have to generate incremental queries that go up to a # certain depth (_COMPONENTS_MAX_LEN - #components). gs_index = components.index(GLOBSTAR) if gs_index == len(components) - 1: return [ self.__build_select_names_query(table, components[:gs_index]) ] prefix = components[:gs_index] suffix = components[gs_index + 1:] + [[_LAST_COMPONENT]] max_wildcards = min(self.max_queries_per_pattern, _COMPONENTS_MAX_LEN - len(components)) return [ self.__build_select_names_query( table, prefix + wildcards * [[bg_glob.AnySequence()]] + suffix, ) for wildcards in range(1, max_wildcards) ]
def setUp(self): self._wildcard_samples = [ bg_glob.AnyChar(), bg_glob.AnySequence(), "a" ] self._regexp_samples = [ bg_glob.CharIn(["α", "β"]), bg_glob.SequenceIn(["γ", "δ"]), ]
def setUp(self): self._wildcard_samples = [ bg_glob.AnyChar(), bg_glob.AnySequence(), 'a' ] self._regexp_samples = [ bg_glob.CharIn(['α', 'β']), bg_glob.SequenceIn(['γ', 'δ']) ]
def test_graphite_glob_parser(self): scenarii = [ # Positive examples ("a.b", [['a'], ['b']]), ("a?b.c", [['a', bg_glob.AnyChar(), 'b'], ['c']]), ("a.b*c", [['a'], ['b', bg_glob.AnySequence(), 'c']]), ("a.b**c", [['a'], ['b'], bg_glob.Globstar(), ['c']]), ("a.**.c", [['a'], bg_glob.Globstar(), ['c']]), ("a.**", [['a'], bg_glob.Globstar()]), ("a[xyz].b", [['a', bg_glob.AnyChar()], ['b']]), ("a[!rat].b", [['a', bg_glob.AnyChar()], ['b']]), ("pl[a-ox]p", [['pl', bg_glob.AnyChar(), 'p']]), ("a[b-dopx-z]b.c", [['a', bg_glob.AnyChar(), 'b'], ['c']]), ("b[i\\]m", [['b', bg_glob.AnyChar(), 'm']]), ("a[x-xy]b", [['a', bg_glob.AnyChar(), 'b']]), ("a[y-xz]b", [['a', bg_glob.AnyChar(), 'b']]), ("a.b.{c,d}", [['a'], ['b'], [bg_glob.SequenceIn(['c', 'd'])]]), ("a.b.{c,d}-{e,f}", [['a'], ['b'], [ bg_glob.SequenceIn(['c', 'd']), '-', bg_glob.SequenceIn(['e', 'f']) ]]), ("a.b.oh{c{d,e,}{a,b},f{g,h}i}ah", [['a'], ['b'], [ 'oh', bg_glob.SequenceIn([ 'ca', 'cb', 'cda', 'cdb', 'cea', 'ceb', 'fgi', 'fhi' ]), 'ah' ]]), ("a.b{some, x{chars[!xyz], plop}}c", [['a'], ['b', bg_glob.AnySequence(), 'c']]), ("a.{b}", [['a'], ['b']]), # Negative examples ("a[.b", [['a['], ['b']]), ("a{.b", [['a{'], ['b']]), ("a{.b.c}", [['a{'], ['b'], ['c}']]), ("a.", [['a']]), ("a..b", [['a'], ['b']]), ] parser = bg_glob.GraphiteGlobParser() for (glob, expected) in scenarii: parsed = parser.parse(glob) self.assertSequenceEqual(expected, parsed)
def glob_directory_names(self, glob, start_time=None, end_time=None): """See the real Accessor for a description.""" super(_ElasticSearchAccessor, self).glob_directory_names(glob, start_time, end_time) tracing.add_attr_to_trace("glob", str(glob)) if glob == "": return [] components = self.__glob_parser.parse(glob) search = self._create_search_query(start_time, end_time) # There are no "directory" documents, only "metric" documents. Hence appending the # AnySequence after the provided glob: we search for metrics under that path. has_globstar, search = self._search_metrics_from_components( glob, components + [[bg_glob.AnySequence()]], search) if has_globstar: # TODO (t.chataigner) Add a log or raise exception. return [] glob_depth = _get_depth_from_components(components) # Use (glob_depth + 1) to filter only directories and # exclude metrics whose depth is glob_depth. search = search.filter("range", depth={"gte": glob_depth + 1}) search = search.extra(from_=0, size=0) # Do not return metrics. search.aggs.bucket("distinct_dirs", "terms", field="p%d" % glob_depth, size=MAX_QUERY_SIZE) log.debug(json.dumps(search.to_dict(), default=str)) response = search.execute() # This may not be the same behavior as other drivers. # It returns the glob with the list of possible last component for a directory. # It doesn't return the list of fully defined directory names. if "distinct_dirs" not in response.aggregations: # This happend when there is no index to search for the query. return [] buckets = response.aggregations.distinct_dirs.buckets if glob_depth == 0: results = [b.key for b in buckets] else: glob_base = glob.rsplit(".", 1)[0] results = ["%s.%s" % (glob_base, b.key) for b in buckets] results.sort() return iter(results)
def _search_directory_from_components(self, glob, components, search=None): """Assembles a query to search directory names. Raises: InvalidArgumentError: If the components include a globstar """ # TODO (r.bizos) add unittest with directory index glob_depth = _get_depth_from_components(components) if search is None: search = self._create_search_query() if components.count(bg_glob.Globstar()): raise InvalidArgumentError( "Directory glob does not handle globstar") if self._directory_index_enabled and self.__glob_parser.is_fully_defined( components): search = search.filter("term", name=".".join([c[0] for c in components])) elif self._directory_index_enabled and self.__glob_parser.is_fully_defined( components[:-1]): # fully defined parent, only usable with directory index search = search.filter("term", parent=DIRECTORY_SEPARATOR.join( [c[0] for c in components[:-1]])) else: if self._directory_index_enabled: # When using a second index for directories we don't need range # aggregation prevent having duplicates across indices search = search.filter("term", depth=glob_depth) else: # Use (glob_depth + 1) to filter only directories and # exclude metrics whose depth is glob_depth. search = search.filter("range", depth={"gte": glob_depth + 1}) components = components + [[bg_glob.AnySequence()]] _, search = self._search_metrics_from_components( glob, components, search) search = search.extra(from_=0, size=0) # Do not return metrics nor directories search.aggs.bucket("distinct_dirs", "terms", field="p%d" % glob_depth, size=MAX_QUERY_SIZE) return search
from biggraphite import glob_utils from biggraphite.drivers import cassandra_common class Error(Exception): """Base class for all exceptions from this module.""" class GlobError(Exception): """Base class for all translation exceptions from this module.""" # Graphite Abstract Syntax Tree supported types GLOBSTAR = glob_utils.Globstar() ANYSEQUENCE = glob_utils.AnySequence() # Value stored in Cassandra columns component_? to mark the end of the metric name or pattern END_MARK = cassandra_common.LAST_COMPONENT LUCENE_FILTER = """{ filter: { type: "boolean", must: [ %s ] } }""" FIELD_MATCH_VALUE = '{ field:"%s", type:"match", value:"%s" }' FIELD_REGEX_VALUE = '{ field:"%s", type:"regexp", value:"%s" }'
def test_AnySequence_should_be_translated_into_an_asterisk(self): _, result = bg_elasticsearch.parse_complex_component( [bg_glob.AnySequence()]) self.assertEqual(result, "*")
def test_any_sequence_should_have_no_constraint(self): self.assertEqual( bg_elasticsearch.parse_simple_component([bg_glob.AnySequence()]), (None, None), )
# limitations under the License. """Cassandra indexing code using SASI.""" from __future__ import absolute_import from __future__ import print_function import itertools from cassandra import encoder as c_encoder from biggraphite import glob_utils as bg_glob from biggraphite import accessor as bg_accessor from biggraphite.drivers import cassandra_common GLOBSTAR = bg_glob.Globstar() ANYSEQUENCE = bg_glob.AnySequence() _COMPONENTS_MAX_LEN = cassandra_common.COMPONENTS_MAX_LEN _LAST_COMPONENT = cassandra_common.LAST_COMPONENT DIRECTORY_SEPARATOR = cassandra_common.DIRECTORY_SEPARATOR METADATA_CREATION_CQL_PARENT_INDEXES = [ "CREATE CUSTOM INDEX IF NOT EXISTS ON \"%%(keyspace)s\".%(table)s (parent)" " USING 'org.apache.cassandra.index.sasi.SASIIndex'" " WITH OPTIONS = {" " 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer'," " 'case_sensitive': 'true'" " };" % { "table": t } for t in ('metrics', 'directories') ]
def test_graphite_glob_parser(self): scenarii = [ # Positive examples ("a.b", [["a"], ["b"]], True), ("a.{b}", [["a"], ["b"]], True), ("a?b.c", [["a", bg_glob.AnyChar(), "b"], ["c"]], False), ("a.b*c", [["a"], ["b", bg_glob.AnySequence(), "c"]], False), ("a.b**c", [["a"], ["b"], bg_glob.Globstar(), ["c"]], False), ("a.**.c", [["a"], bg_glob.Globstar(), ["c"]], False), ("a.**", [["a"], bg_glob.Globstar()], False), ("a[xyz].b", [["a", bg_glob.CharIn(["x", "y", "z"])], ["b"]], False), ("a[!rat].b", [["a", bg_glob.CharNotIn(["r", "a", "t"])], ["b"]], False), ("pl[a-ox]p", [["pl", bg_glob.CharIn(["a-o", "x"]), "p"]], False), ( "a[b-dopx-z]b.c", [["a", bg_glob.CharIn(["b-d", "o", "p", "x-z"]), "b"], ["c"]], False, ), ("b[i\\]m", [["b", bg_glob.CharIn(["\\", "i"]), "m"]], False), ("a[x-xy]b", [["a", bg_glob.CharIn(["x-x", "y"]), "b"]], False), ("a[y-xz]b", [["a", bg_glob.CharIn(["y-x", "z"]), "b"]], False), ("a.b.{c,d}", [["a"], ["b"], [bg_glob.SequenceIn(["c", "d"])]], False), ( "a.b.{c,d}-{e,f}", [ ["a"], ["b"], [ bg_glob.SequenceIn(["c", "d"]), "-", bg_glob.SequenceIn(["e", "f"]), ], ], False, ), ( "a.b.oh{c{d,e,}{a,b},f{g,h}i}ah", [ ["a"], ["b"], [ "oh", bg_glob.SequenceIn([ "ca", "cb", "cda", "cdb", "cea", "ceb", "fgi", "fhi" ]), "ah", ], ], False, ), ( "a.b{some, x{chars[!xyz], plop}}c", [["a"], ["b", bg_glob.AnySequence(), "c"]], False, ), # Negative examples ("a[.b", [["a["], ["b"]], True), ("a{.b", [["a{"], ["b"]], True), ("a{.b.c}", [["a{"], ["b"], ["c}"]], True), ("a.", [["a"]], True), ("a..b", [["a"], ["b"]], True), ] parser = bg_glob.GraphiteGlobParser() for i, (glob, expected, fully_defined) in enumerate(scenarii): parsed = parser.parse(glob) self.assertSequenceEqual(expected, parsed) self.assertEqual(fully_defined, parser.is_fully_defined(parsed), parsed)