Esempio n. 1
0
 def test_graphite_glob_parser(self):
     scenarii = [
         # Positive examples
         ("a.b", [['a'], ['b']], True),
         ("a.{b}", [['a'], ['b']], True),
         ("a?b.c", [['a', bg_glob.AnyChar(), 'b'], ['c']], False),
         ("a.b*c", [['a'], ['b', bg_glob.AnySequence(), 'c']], False),
         ("a.b**c", [['a'], ['b'], bg_glob.Globstar(), ['c']], False),
         ("a.**.c", [['a'], bg_glob.Globstar(), ['c']], False),
         ("a.**", [['a'], bg_glob.Globstar()], False),
         ("a[xyz].b", [['a', bg_glob.CharIn(['x', 'y', 'z'])],
                       ['b']], False),
         ("a[!rat].b", [['a', bg_glob.CharNotIn(['r', 'a', 't'])],
                        ['b']], False),
         ("pl[a-ox]p", [['pl', bg_glob.CharIn(['a-o', 'x']), 'p']], False),
         ("a[b-dopx-z]b.c",
          [['a', bg_glob.CharIn(['b-d', 'o', 'p', 'x-z']), 'b'],
           ['c']], False),
         ("b[i\\]m", [['b', bg_glob.CharIn(['\\', 'i']), 'm']], False),
         ("a[x-xy]b", [['a', bg_glob.CharIn(['x-x', 'y']), 'b']], False),
         ("a[y-xz]b", [['a', bg_glob.CharIn(['y-x', 'z']), 'b']], False),
         ("a.b.{c,d}", [['a'], ['b'], [bg_glob.SequenceIn(['c',
                                                           'd'])]], False),
         ("a.b.{c,d}-{e,f}", [['a'], ['b'],
                              [
                                  bg_glob.SequenceIn(['c', 'd']), '-',
                                  bg_glob.SequenceIn(['e', 'f'])
                              ]], False),
         ("a.b.oh{c{d,e,}{a,b},f{g,h}i}ah", [['a'], ['b'],
                                             [
                                                 'oh',
                                                 bg_glob.SequenceIn([
                                                     'ca', 'cb', 'cda',
                                                     'cdb', 'cea', 'ceb',
                                                     'fgi', 'fhi'
                                                 ]), 'ah'
                                             ]], False),
         ("a.b{some, x{chars[!xyz], plop}}c",
          [['a'], ['b', bg_glob.AnySequence(), 'c']], False),
         # Negative examples
         ("a[.b", [['a['], ['b']], True),
         ("a{.b", [['a{'], ['b']], True),
         ("a{.b.c}", [['a{'], ['b'], ['c}']], True),
         ("a.", [['a']], True),
         ("a..b", [['a'], ['b']], True),
     ]
     parser = bg_glob.GraphiteGlobParser()
     for i, (glob, expected, fully_defined) in enumerate(scenarii):
         parsed = parser.parse(glob)
         self.assertSequenceEqual(expected, parsed)
         self.assertEqual(fully_defined, parser.is_fully_defined(parsed),
                          parsed)
Esempio n. 2
0
    def __generate_globstar_names_queries(self, table, components):
        # Handling more than one of these can cause combinatorial explosion.
        if components.count(GLOBSTAR) > 1:
            raise bg_accessor.InvalidGlobError(
                "Contains more than one globstar (**) operator")

        # If the globstar operator is at the end of the pattern, then we can
        # find corresponding metrics with a prefix search;
        # otherwise, we have to generate incremental queries that go up to a
        # certain depth (_COMPONENTS_MAX_LEN - #components).
        gs_index = components.index(GLOBSTAR)
        if gs_index == len(components) - 1:
            return [
                self.__build_select_names_query(table, components[:gs_index])
            ]

        prefix = components[:gs_index]
        suffix = components[gs_index + 1:] + [[_LAST_COMPONENT]]
        max_wildcards = min(self.max_queries_per_pattern,
                            _COMPONENTS_MAX_LEN - len(components))
        return [
            self.__build_select_names_query(
                table,
                prefix + wildcards * [[bg_glob.AnySequence()]] + suffix,
            ) for wildcards in range(1, max_wildcards)
        ]
Esempio n. 3
0
 def setUp(self):
     self._wildcard_samples = [
         bg_glob.AnyChar(), bg_glob.AnySequence(), "a"
     ]
     self._regexp_samples = [
         bg_glob.CharIn(["α", "β"]),
         bg_glob.SequenceIn(["γ", "δ"]),
     ]
Esempio n. 4
0
 def setUp(self):
     self._wildcard_samples = [
         bg_glob.AnyChar(), bg_glob.AnySequence(), 'a'
     ]
     self._regexp_samples = [
         bg_glob.CharIn(['α', 'β']),
         bg_glob.SequenceIn(['γ', 'δ'])
     ]
Esempio n. 5
0
 def test_graphite_glob_parser(self):
     scenarii = [
         # Positive examples
         ("a.b", [['a'], ['b']]),
         ("a?b.c", [['a', bg_glob.AnyChar(), 'b'], ['c']]),
         ("a.b*c", [['a'], ['b', bg_glob.AnySequence(), 'c']]),
         ("a.b**c", [['a'], ['b'], bg_glob.Globstar(), ['c']]),
         ("a.**.c", [['a'], bg_glob.Globstar(), ['c']]),
         ("a.**", [['a'], bg_glob.Globstar()]),
         ("a[xyz].b", [['a', bg_glob.AnyChar()], ['b']]),
         ("a[!rat].b", [['a', bg_glob.AnyChar()], ['b']]),
         ("pl[a-ox]p", [['pl', bg_glob.AnyChar(), 'p']]),
         ("a[b-dopx-z]b.c", [['a', bg_glob.AnyChar(), 'b'], ['c']]),
         ("b[i\\]m", [['b', bg_glob.AnyChar(), 'm']]),
         ("a[x-xy]b", [['a', bg_glob.AnyChar(), 'b']]),
         ("a[y-xz]b", [['a', bg_glob.AnyChar(), 'b']]),
         ("a.b.{c,d}", [['a'], ['b'], [bg_glob.SequenceIn(['c', 'd'])]]),
         ("a.b.{c,d}-{e,f}", [['a'], ['b'],
                              [
                                  bg_glob.SequenceIn(['c', 'd']), '-',
                                  bg_glob.SequenceIn(['e', 'f'])
                              ]]),
         ("a.b.oh{c{d,e,}{a,b},f{g,h}i}ah", [['a'], ['b'],
                                             [
                                                 'oh',
                                                 bg_glob.SequenceIn([
                                                     'ca', 'cb', 'cda',
                                                     'cdb', 'cea', 'ceb',
                                                     'fgi', 'fhi'
                                                 ]), 'ah'
                                             ]]),
         ("a.b{some, x{chars[!xyz], plop}}c",
          [['a'], ['b', bg_glob.AnySequence(), 'c']]),
         ("a.{b}", [['a'], ['b']]),
         # Negative examples
         ("a[.b", [['a['], ['b']]),
         ("a{.b", [['a{'], ['b']]),
         ("a{.b.c}", [['a{'], ['b'], ['c}']]),
         ("a.", [['a']]),
         ("a..b", [['a'], ['b']]),
     ]
     parser = bg_glob.GraphiteGlobParser()
     for (glob, expected) in scenarii:
         parsed = parser.parse(glob)
         self.assertSequenceEqual(expected, parsed)
Esempio n. 6
0
    def glob_directory_names(self, glob, start_time=None, end_time=None):
        """See the real Accessor for a description."""
        super(_ElasticSearchAccessor,
              self).glob_directory_names(glob, start_time, end_time)
        tracing.add_attr_to_trace("glob", str(glob))

        if glob == "":
            return []

        components = self.__glob_parser.parse(glob)
        search = self._create_search_query(start_time, end_time)
        # There are no "directory" documents, only "metric" documents. Hence appending the
        # AnySequence after the provided glob: we search for metrics under that path.
        has_globstar, search = self._search_metrics_from_components(
            glob, components + [[bg_glob.AnySequence()]], search)
        if has_globstar:
            # TODO (t.chataigner) Add a log or raise exception.
            return []

        glob_depth = _get_depth_from_components(components)
        # Use (glob_depth + 1) to filter only directories and
        # exclude metrics whose depth is glob_depth.
        search = search.filter("range", depth={"gte": glob_depth + 1})
        search = search.extra(from_=0, size=0)  # Do not return metrics.

        search.aggs.bucket("distinct_dirs",
                           "terms",
                           field="p%d" % glob_depth,
                           size=MAX_QUERY_SIZE)

        log.debug(json.dumps(search.to_dict(), default=str))
        response = search.execute()

        # This may not be the same behavior as other drivers.
        # It returns the glob with the list of possible last component for a directory.
        # It doesn't return the list of fully defined directory names.
        if "distinct_dirs" not in response.aggregations:
            # This happend when there is no index to search for the query.
            return []
        buckets = response.aggregations.distinct_dirs.buckets
        if glob_depth == 0:
            results = [b.key for b in buckets]
        else:
            glob_base = glob.rsplit(".", 1)[0]
            results = ["%s.%s" % (glob_base, b.key) for b in buckets]
        results.sort()
        return iter(results)
Esempio n. 7
0
    def _search_directory_from_components(self, glob, components, search=None):
        """Assembles a query to search directory names.

        Raises:
          InvalidArgumentError: If the components include a globstar
        """
        # TODO (r.bizos) add unittest with directory index
        glob_depth = _get_depth_from_components(components)
        if search is None:
            search = self._create_search_query()
        if components.count(bg_glob.Globstar()):
            raise InvalidArgumentError(
                "Directory glob does not handle globstar")

        if self._directory_index_enabled and self.__glob_parser.is_fully_defined(
                components):
            search = search.filter("term",
                                   name=".".join([c[0] for c in components]))

        elif self._directory_index_enabled and self.__glob_parser.is_fully_defined(
                components[:-1]):
            # fully defined parent, only usable with directory index
            search = search.filter("term",
                                   parent=DIRECTORY_SEPARATOR.join(
                                       [c[0] for c in components[:-1]]))

        else:
            if self._directory_index_enabled:
                # When using a second index for directories we don't need range
                # aggregation prevent having duplicates across indices
                search = search.filter("term", depth=glob_depth)
            else:
                # Use (glob_depth + 1) to filter only directories and
                # exclude metrics whose depth is glob_depth.
                search = search.filter("range", depth={"gte": glob_depth + 1})
                components = components + [[bg_glob.AnySequence()]]
            _, search = self._search_metrics_from_components(
                glob, components, search)

        search = search.extra(from_=0,
                              size=0)  # Do not return metrics nor directories
        search.aggs.bucket("distinct_dirs",
                           "terms",
                           field="p%d" % glob_depth,
                           size=MAX_QUERY_SIZE)
        return search
Esempio n. 8
0
from biggraphite import glob_utils
from biggraphite.drivers import cassandra_common


class Error(Exception):
    """Base class for all exceptions from this module."""


class GlobError(Exception):
    """Base class for all translation exceptions from this module."""


# Graphite Abstract Syntax Tree supported types
GLOBSTAR = glob_utils.Globstar()
ANYSEQUENCE = glob_utils.AnySequence()

# Value stored in Cassandra columns component_? to mark the end of the metric name or pattern
END_MARK = cassandra_common.LAST_COMPONENT

LUCENE_FILTER = """{
    filter: {
        type: "boolean",
        must: [
            %s
        ]
    }
}"""

FIELD_MATCH_VALUE = '{ field:"%s", type:"match", value:"%s" }'
FIELD_REGEX_VALUE = '{ field:"%s", type:"regexp", value:"%s" }'
Esempio n. 9
0
 def test_AnySequence_should_be_translated_into_an_asterisk(self):
     _, result = bg_elasticsearch.parse_complex_component(
         [bg_glob.AnySequence()])
     self.assertEqual(result, "*")
Esempio n. 10
0
 def test_any_sequence_should_have_no_constraint(self):
     self.assertEqual(
         bg_elasticsearch.parse_simple_component([bg_glob.AnySequence()]),
         (None, None),
     )
Esempio n. 11
0
# limitations under the License.
"""Cassandra indexing code using SASI."""

from __future__ import absolute_import
from __future__ import print_function

import itertools

from cassandra import encoder as c_encoder

from biggraphite import glob_utils as bg_glob
from biggraphite import accessor as bg_accessor
from biggraphite.drivers import cassandra_common

GLOBSTAR = bg_glob.Globstar()
ANYSEQUENCE = bg_glob.AnySequence()

_COMPONENTS_MAX_LEN = cassandra_common.COMPONENTS_MAX_LEN
_LAST_COMPONENT = cassandra_common.LAST_COMPONENT
DIRECTORY_SEPARATOR = cassandra_common.DIRECTORY_SEPARATOR

METADATA_CREATION_CQL_PARENT_INDEXES = [
    "CREATE CUSTOM INDEX IF NOT EXISTS ON \"%%(keyspace)s\".%(table)s (parent)"
    "  USING 'org.apache.cassandra.index.sasi.SASIIndex'"
    "  WITH OPTIONS = {"
    "    'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',"
    "    'case_sensitive': 'true'"
    "  };" % {
        "table": t
    } for t in ('metrics', 'directories')
]
Esempio n. 12
0
 def test_graphite_glob_parser(self):
     scenarii = [
         # Positive examples
         ("a.b", [["a"], ["b"]], True),
         ("a.{b}", [["a"], ["b"]], True),
         ("a?b.c", [["a", bg_glob.AnyChar(), "b"], ["c"]], False),
         ("a.b*c", [["a"], ["b", bg_glob.AnySequence(), "c"]], False),
         ("a.b**c", [["a"], ["b"], bg_glob.Globstar(), ["c"]], False),
         ("a.**.c", [["a"], bg_glob.Globstar(), ["c"]], False),
         ("a.**", [["a"], bg_glob.Globstar()], False),
         ("a[xyz].b", [["a", bg_glob.CharIn(["x", "y", "z"])],
                       ["b"]], False),
         ("a[!rat].b", [["a", bg_glob.CharNotIn(["r", "a", "t"])],
                        ["b"]], False),
         ("pl[a-ox]p", [["pl", bg_glob.CharIn(["a-o", "x"]), "p"]], False),
         (
             "a[b-dopx-z]b.c",
             [["a", bg_glob.CharIn(["b-d", "o", "p", "x-z"]), "b"], ["c"]],
             False,
         ),
         ("b[i\\]m", [["b", bg_glob.CharIn(["\\", "i"]), "m"]], False),
         ("a[x-xy]b", [["a", bg_glob.CharIn(["x-x", "y"]), "b"]], False),
         ("a[y-xz]b", [["a", bg_glob.CharIn(["y-x", "z"]), "b"]], False),
         ("a.b.{c,d}", [["a"], ["b"], [bg_glob.SequenceIn(["c",
                                                           "d"])]], False),
         (
             "a.b.{c,d}-{e,f}",
             [
                 ["a"],
                 ["b"],
                 [
                     bg_glob.SequenceIn(["c", "d"]),
                     "-",
                     bg_glob.SequenceIn(["e", "f"]),
                 ],
             ],
             False,
         ),
         (
             "a.b.oh{c{d,e,}{a,b},f{g,h}i}ah",
             [
                 ["a"],
                 ["b"],
                 [
                     "oh",
                     bg_glob.SequenceIn([
                         "ca", "cb", "cda", "cdb", "cea", "ceb", "fgi",
                         "fhi"
                     ]),
                     "ah",
                 ],
             ],
             False,
         ),
         (
             "a.b{some, x{chars[!xyz], plop}}c",
             [["a"], ["b", bg_glob.AnySequence(), "c"]],
             False,
         ),
         # Negative examples
         ("a[.b", [["a["], ["b"]], True),
         ("a{.b", [["a{"], ["b"]], True),
         ("a{.b.c}", [["a{"], ["b"], ["c}"]], True),
         ("a.", [["a"]], True),
         ("a..b", [["a"], ["b"]], True),
     ]
     parser = bg_glob.GraphiteGlobParser()
     for i, (glob, expected, fully_defined) in enumerate(scenarii):
         parsed = parser.parse(glob)
         self.assertSequenceEqual(expected, parsed)
         self.assertEqual(fully_defined, parser.is_fully_defined(parsed),
                          parsed)