Esempio n. 1
0
    def __repr__(self):
        # print([t for t in self.tokens.items()])
        if 'singleterm' in self.tokens:
            if self.tokens.fieldname == '_exists_':
                return '"attributes"::jsonb ? \'{}\''.format(self.tokens.singleterm)
            elif self.tokens.fieldname in ['correlate', 'service', 'tags']:
                return '\'{}\'=ANY("{}")'.format(self.tokens.singleterm, self.tokens.field[0])
            else:
                return '"{}" ILIKE \'%%{}%%\''.format(self.tokens.field[0], self.tokens.singleterm)
        if 'phrase' in self.tokens:
            if self.tokens.field[0] == '__default_field__':
                return '"{}" ~* \'\\y{}\\y\''.format('__default_field__', self.tokens.phrase)
            elif self.tokens.field[0] in ['correlate', 'service', 'tags']:
                return '\'{}\'=ANY("{}")'.format(self.tokens.term, self.tokens.field[0])
            else:
                return '"{}" ~* \'\\y{}\\y\''.format(self.tokens.field[0], self.tokens.phrase)
        if 'wildcard' in self.tokens:
            return '"{}" ~* \'\\y{}\\y\''.format(self.tokens.field[0], self.tokens.wildcard)
        if 'regex' in self.tokens:
            return '"{}" ~* \'{}\''.format(self.tokens.field[0], self.tokens.regex)
        if 'range' in self.tokens:
            if self.tokens.range[0].lowerbound == '*':
                lower_term = '1=1'
            else:
                lower_term = '"{}" {} \'{}\''.format(
                    self.tokens.field[0],
                    '>=' if 'inclusive' in self.tokens.range[0] else '>',
                    self.tokens.range[0].lowerbound
                )

            if self.tokens.range[2].upperbound == '*':
                upper_term = '1=1'
            else:
                upper_term = '"{}" {} \'{}\''.format(
                    self.tokens.field[0],
                    '<=' if 'inclusive' in self.tokens.range[2] else '<',
                    self.tokens.range[2].upperbound
                )
            return '({} AND {})'.format(lower_term, upper_term)
        if 'onesidedrange' in self.tokens:
            return '("{}" {} \'{}\')'.format(
                self.tokens.field[0],
                self.tokens.onesidedrange.op,
                self.tokens.onesidedrange.bound
            )
        if 'subquery' in self.tokens:
            return '{}'.format(self.tokens.subquery[0]).replace('__default_field__', self.tokens.field[0])

        raise ParseException('Search term did not match query syntax: %s' % self.tokens)
Esempio n. 2
0
    def loadTestsFromFile(self, filename, indexes=[]):
        log.debug("Loading from file %s" % filename)

        step_registry = StepImplRegistry(TagMatcher)
        try:
            feat = load_feature(filename, self.language)
            path = os.path.dirname(filename)
            self.impl_loader.load_steps_impl(step_registry, path,
                                             feat.use_step_defs)
        except ParseException, e:
            ec, ev, tb = sys.exc_info()
            yield Failure(
                ParseException,
                ParseException(e.pstr, e.loc, e.msg + " in %s" % filename), tb)
            return
def _get_final_tree(s):
    """
    Return final tree after merge and clean.

    Raises pyparsing.ParseException if s is invalid.
    """
    try:
        tokenized = tokenizer.parseString(s)
        parsed = parser.parse(tokenized)
        merged = _merge_children(parsed.next(), {'S', 'group'})
        final = _clean_parse_tree(merged)
        return final
    except StopIteration:
        # This happens with an empty tree- see this with input e.g. 'H2O(', or 'Xe+').
        raise ParseException("Shouldn't have empty trees")
Esempio n. 4
0
    def add_sections_info(self, sections_info_file):
        first_line = sections_info_file.readline()

        archive_path = Literal("In archive").suppress() + Regex(r"[^:]+").setResultsName("archive_path") + Literal(":").suppress()
        parser = archive_path

        results = None

        try:
            results = parser.parseString(first_line)
        except ParseException as p:
            raise ParseException("File " + sections_info_file.name + " is not a valid sections info file. " + p.message)

        archive = os.path.basename(results.archive_path)
        self.sections[archive] = SectionsInfo.__info(sections_info_file.name, sections_info_file.read())
Esempio n. 5
0
def parse(sql):
    try:
        parse_result = SQLParser.parseString(sql, parseAll=True)
    except Exception as e:
        if isinstance(e, ParseException) and e.msg == "Expected end of text":
            problems = all_exceptions.get(e.loc, [])
            expecting = [
                f for f in (
                    set(p.msg.lstrip("Expected").strip()
                        for p in problems) - {"Found unwanted token"})
                if not f.startswith("{")
            ]
            raise ParseException(
                sql, e.loc,
                "Expecting one of (" + (", ".join(expecting)) + ")")
        raise
    return _scrub(parse_result)
Esempio n. 6
0
def _parse_ipv6(tokens):
    """
    Helper function to parse IPv6 addresses.

    """
    match = str(tokens[0])
    ipv6 = None
    try:
        ipv6 = ipaddress.IPv6Address(match)
    except ipaddress.AddressValueError:
        try:
            ipv6 = ipaddress.IPv6Network(match, strict=False)
        except (ipaddress.AddressValueError, ipaddress.NetmaskValueError):
            pass
    if not ipv6:
        raise ParseException("Non valid IPv6 address/network.")
    return str(ipv6)
Esempio n. 7
0
    def evaluate_stack(self, expr):
        """
        Recursively reads the next expression from the stack.
        Looks up supported operators and/or functions and applies
        them.
        """
        operator = expr.pop()
        if operator == self.UNARY:
            return -self.evaluate_stack(expr)

            rhs, lhs = self.evaluate_stack(expr), self.evaluate_stack(expr)

        if operator in self.operators:
            rhs, lhs = self.evaluate_stack(expr), self.evaluate_stack(expr)
            if operator in self.bool_operators:
                if not rhs:
                    rhs = False
                if not lhs:
                    lhs = False
            if operator == "in":
                rhs, lhs = lhs, rhs
            if isinstance(lhs, ParseResults):
                lhs = [self._get_return_value(item) for item in lhs]
            return self.operators[operator](lhs, rhs)
        elif operator in self.functions:
            return self.functions[operator](self.evaluate_stack(expr))
        elif operator in self.constants:
            return self.constants[operator]
        elif operator and operator[0] == "$":
            variable = operator[1:]

            if variable in [v["name"] for v in self.reserved_variables]:
                return self._get_reserved_variable(variable)

            if not self.user:
                raise ParseException(
                    "No user instance set. Please initialize the %s "
                    "with a `user_obj` argument." % self.__class__.__name__)

            return self._get_return_value(self.userdata.get(variable))
        elif operator in ("True", "False"):
            return True if operator == "True" else False
        elif operator == "None":
            return None
        else:
            return self._get_return_value(operator)
Esempio n. 8
0
    def add_sections_info(self, sections_info_dump):
        first_line = sections_info_dump.readline()

        archive_path = (Literal('In archive').suppress() +
                        White().suppress() +
                        # trim the colon and line ending characters from archive_path
                        restOfLine.setResultsName('archive_path').setParseAction(lambda s, loc, toks: s.rstrip(':\n\r ')))
        parser = archive_path

        results = None

        try:
            results = parser.parseString(first_line, parseAll=True)
        except ParseException as p:
            raise ParseException('Parsing sections info for library ' + sections_info_dump.name + ' failed. ' + p.msg)

        archive = os.path.basename(results.archive_path)
        self.sections[archive] = SectionsInfo.__info(sections_info_dump.name, sections_info_dump.read())
Esempio n. 9
0
    def __init__(self, t):
        if t[0] not in ["value", "count", "unique"]:
            raise ParseException(f"Only `value`, `count`, `unique` functions are supported but `{t[0]}` received.")
        if t[0] == "value":
            self.column = "sum_value" if t[0] == "value" else "count"
            self.column_sqr = "sum_sqr_value" if t[0] == "value" else "sum_sqr_count"
        if t[0] == "count":
            self.column = "count"
            self.column_sqr = "sum_sqr_count"
        if t[0] == "unique":
            self.column = "count_unique"
            self.column_sqr = "count_unique"

        self.unit_type = t[2].unit_type
        self.agg_type = t[4].agg_type
        self.goal = t[6].goal
        self.dimension = t[8].dimension if len(t) > 8 else ""
        self.dimension_value = t[10].dimension_value if len(t) > 8 else ""
Esempio n. 10
0
    def add_sections_info(self, sections_info_dump):
        first_line = sections_info_dump.readline()

        archive_path = (Literal("In archive").suppress() +
                        # trim the last character from archive_path, :
                        Word(printables + " ").setResultsName("archive_path").setParseAction(lambda t: t[0][:-1]) +
                        LineEnd())
        parser = archive_path

        results = None

        try:
            results = parser.parseString(first_line)
        except ParseException as p:
            raise ParseException("Parsing sections info for library " + sections_info_dump.name + " failed. " + p.msg)

        archive = os.path.basename(results.archive_path)
        self.sections[archive] = SectionsInfo.__info(sections_info_dump.name, sections_info_dump.read())
Esempio n. 11
0
    def __init__(self, t):
        if t[0] not in ['value', 'count', 'unique']:
            raise ParseException(f'Only `value`, `count`, `unique` functions are supported but `{t[0]}` received.')
        if t[0] == 'value':
            self.column = 'sum_value' if t[0] == 'value' else 'count'
            self.column_sqr = 'sum_sqr_value' if t[0] == 'value' else 'sum_sqr_count'
        if t[0] == 'count':
            self.column = 'count'
            self.column_sqr = 'sum_sqr_count'
        if t[0] == 'unique':
            self.column = 'count_unique'
            self.column_sqr = 'count_unique'

        self.unit_type = t[2].unit_type
        self.agg_type = t[4].agg_type
        self.goal = t[6].goal
        self.dimension = t[8].dimension if len(t) > 8 else ''
        self.dimension_value = t[10].dimension_value if len(t) > 8 else ''
Esempio n. 12
0
    def convertTimeList(self, tokens):
        output = []
        timeFormatValue = {
            "A1": "year",
            "H1": "half",
            "Q1": "quarter",
            "M1": "month",
            "W1": "week",
        }
        try:
            timeFormat = timeFormatValue[tokens[0]]
        except KeyError:
            raise ParseException("{0} is not a valid time code.".format(
                tokens[0]))

        for timeValue in tokens[1]:
            timeValue = str(timeValue)
            period = {
                "frequency": timeFormat,
                "year": "",
                "number": "",
            }
            if timeFormat == "year":
                period["year"] = timeValue
            elif timeFormat == "half":
                halfyearDate = re.match("(\d{4})([1-2])", timeValue)
                period["year"] = halfyearDate.group(1)
                period["number"] = halfyearDate.group(2)
            elif timeFormat == "quarter":
                quarterDate = re.match("(\d{4})([1-4])", timeValue)
                period["year"] = quarterDate.group(1)
                period["number"] = quarterDate.group(2)
            elif timeFormat == "month":
                monthDate = re.match("(\d{4})(\d{2})", timeValue)
                period["year"] = monthDate.group(1)
                period["number"] = monthDate.group(2)
            elif timeFormat == "week":
                weekDate = re.match("(\d{4})(\d{2})", timeValue)
                period["year"] = weekDate.group(1)
                period["number"] = weekDate.group(2)
            output.append(period)

        return output
Esempio n. 13
0
    def __init__(self, tokens):
        tokens = tokens[0]
        if len(tokens) % 2 == 1:
            self.op_token = tokens[1]
            self.comparators = tokens[::2]
        else:
            err = "Invalid number of infix expressions: {}"
            err = err.format(len(tokens))
            raise ParseException(err)
        assert self.op_token in self.keyword_aliases

        # Check for too many literals and not enough keywords
        op = self.keyword_aliases[self.op_token]
        if isinstance(op, ast.boolop):
            if any(isinstance(c, Literal) for c in self.comparators):
                raise ValueError("Cannot use literals as truth")
        else:
            if all(isinstance(c, Literal) for c in self.comparators):
                raise ValueError("Cannot compare literals.")
Esempio n. 14
0
def _merge_children(tree, tags):
    ''' nltk, by documentation, cannot do arbitrary length
    groups. Instead of:
    (group 1 2 3 4)
    It has to handle this recursively:
    (group 1 (group 2 (group 3 (group 4))))
    We do the cleanup of converting from the latter to the former.
    '''
    if tree is None:
        # There was a problem--shouldn't have empty trees (NOTE: see this with input e.g. 'H2O(', or 'Xe+').
        # Haven't grokked the code to tell if this is indeed the right thing to do.
        raise ParseException("Shouldn't have empty trees")

    if type(tree) == str:
        return tree

    merged_children = []
    done = False
    #print '00000', tree
    ## Merge current tag
    while not done:
        done = True
        for child in tree:
            if type(
                    child
            ) == nltk.tree.Tree and child.node == tree.node and tree.node in tags:
                merged_children = merged_children + list(child)
                done = False
            else:
                merged_children = merged_children + [child]
        tree = nltk.tree.Tree(tree.node, merged_children)
        merged_children = []
    #print '======',tree

    # And recurse
    children = []
    for child in tree:
        children.append(_merge_children(child, tags))

    #return tree
    return nltk.tree.Tree(tree.node, children)
Esempio n. 15
0
    def parse_specification(text):
        # type: (str) -> Dict
        '''
        Parse a string for a specification.

        Args:
            text (str): String to be parsed.

        Raises:
            ParseException: If specification is not found.

        Returns:
            dict: Dictionary with "specification" key.
        '''
        try:
            return AssetNameParser\
                ._get_specification_parser()\
                .parseString(text)[0].asDict()
        except ParseException:
            msg = f'Specification not found in "{text}".'
            raise ParseException(msg)
Esempio n. 16
0
    def add_sections_info(self, sections_info_file):
        first_line = sections_info_file.readline()

        archive_path = (
            Literal("In archive").suppress() + White().suppress() +
            # trim the colon and line ending characters from archive_path
            restOfLine.setResultsName("archive_path").setParseAction(
                lambda s, loc, toks: s.rstrip(":\n\r ")))
        parser = archive_path

        results = None

        try:
            results = parser.parseString(first_line)
        except ParseException as p:
            raise ParseException("File " + sections_info_file.name +
                                 " is not a valid sections info file. " +
                                 p.message)

        archive = os.path.basename(results.archive_path)
        self.sections[archive] = SectionsInfo.__info(sections_info_file.name,
                                                     sections_info_file.read())
def _merge_children(tree, tags):
    """
    nltk, by documentation, cannot do arbitrary length groups.
    Instead of: (group 1 2 3 4)
    It has to handle this recursively: (group 1 (group 2 (group 3 (group 4))))
    We do the cleanup of converting from the latter to the former.
    """
    if tree is None:
        # There was a problem--shouldn't have empty trees (NOTE: see this with input e.g. 'H2O(', or 'Xe+').
        raise ParseException("Shouldn't have empty trees")

    if isinstance(tree, str):
        return tree

    merged_children = []
    done = False

    # Merge current tag
    while not done:
        done = True
        for child in tree:
            if isinstance(child, nltk.tree.Tree) and child.label(
            ) == tree.label() and tree.label() in tags:
                merged_children = merged_children + list(child)
                done = False
            else:
                merged_children = merged_children + [child]
        tree = nltk.tree.Tree(tree.label(), merged_children)
        merged_children = []

    # And recurse
    children = []
    for child in tree:
        children.append(_merge_children(child, tags))

    return nltk.tree.Tree(tree.label(), children)
Esempio n. 18
0
        def _expand_search(self, k, v):
            if 'search' not in k:
                return None

            model, relation = None, None
            if k == 'search':
                model = get_model('host')
            elif k.endswith('__search'):
                relation = k.split('__')[0]
                try:
                    model = get_model(relation)
                except LookupError:
                    raise ParseException('No related field named %s' % relation)

            search_kwargs = {}
            if model is not None:
                search_fields = get_search_fields(model)
                for field in search_fields:
                    if relation is not None:
                        k = '{0}__{1}'.format(relation, field)
                    else:
                        k = field
                    search_kwargs[k] = v
            return search_kwargs
def no_keywords_allowed(s, l, t):
    wd = t[0]
    if wd in pythonKeywords:
        errmsg = "cannot not use keyword '%s' " \
                                "as an identifier" % wd
        raise ParseException(s, l, errmsg)
Esempio n. 20
0
 def __init__(self, t):
     if t[0] not in ['unit', 'global']:
         raise ParseException(
             f'Only `unit` and `global` aggregation types are supported but `{t[0]}` received.'
         )
     self.agg_type = t[0]
Esempio n. 21
0
 def hasMandatoryKeywords(self):
     for mandatoryKeyword in self.mandatoryKeywords:
         if not mandatoryKeyword in self.buffers["foundKeywords"]:
             raise ParseException("File is missing required keyword: %s" %
                                  (mandatoryKeyword))
Esempio n. 22
0
def _check_n_tokens(tokens, n_tokens, name):
    if not len(tokens) == n_tokens:
        err = "{} take {} values. You gave {}"
        err = err.format(name, n_tokens, len(tokens))
        raise ParseException(err)
Esempio n. 23
0
 def isValidKeyword(self, tokens):
     keyword = tokens[0]
     if keyword in self.possibleKeywords:
         return keyword
     else:
         raise ParseException("Invalid keyword")
Esempio n. 24
0
def convertToFloat(s, loc, toks):
    try:
        return float(toks[0])
    except:
        raise ParseException(loc, "invalid float format %s" % toks[0])
Esempio n. 25
0
# stateMachine.py
Esempio n. 26
0
 def parseString(self, obj):
     raise ParseException(None, None, None)
Esempio n. 27
0
    def __repr__(self):
        # print([t for t in self.tokens.items()])
        if 'singleterm' in self.tokens:
            tokens_fieldname = self.tokens.fieldname.replace('_.', 'attributes.')
            if self.tokens.fieldname == '_exists_':
                return '{{ "attributes.{}": {{ "$exists": true }} }}'.format(self.tokens.singleterm)
            else:
                if self.tokens.field[0] == '__default_field__':
                    return '{{ "{}": {{ "{}": "{}" }} }}'.format('__default_field__', '__default_operator__', self.tokens.singleterm)
                else:
                    return '{{ "{}": {{ "$regex": "{}" }} }}'.format(tokens_fieldname, self.tokens.singleterm)
        if 'phrase' in self.tokens:
            if self.tokens.field[0] == '__default_field__':
                return '{{ "{}": {{ "{}": "{}" }} }}'.format('__default_field__', '__default_operator__', self.tokens.phrase)
            else:
                return '{{ "{}": {{ "$regex": "{}" }} }}'.format(self.tokens.field[0], self.tokens.phrase)
        if 'wildcard' in self.tokens:
            return '{{ "{}": {{ "$regex": "\\\\b{}\\\\b" }} }}'.format(self.tokens.field[0], self.tokens.wildcard)
        if 'regex' in self.tokens:
            return '{{ "{}": {{ "$regex": "{}" }} }}'.format(self.tokens.field[0], self.tokens.regex)

        def range_term(field, operator, range):
            if field in ['duplicateCount', 'timeout']:
                range = int(range)
            else:
                range = '"{}"'.format(range)
            return '{{ "{}": {{ "{}": {} }} }}'.format(field, operator, range)

        if 'range' in self.tokens:
            if self.tokens.range[0].lowerbound == '*':
                lower_term = '{}'
            else:
                lower_term = range_term(
                    self.tokens.field[0],
                    '$gte' if 'inclusive' in self.tokens.range[0] else '$gt',
                    self.tokens.range[0].lowerbound
                )
            if self.tokens.range[2].upperbound == '*':
                upper_term = '{}'
            else:
                upper_term = range_term(
                    self.tokens.field[0],
                    '$lte' if 'inclusive' in self.tokens.range[2] else '$lt',
                    self.tokens.range[2].upperbound
                )
            return '{{ "$and": [ {}, {} ] }}'.format(lower_term, upper_term)
        if 'onesidedrange' in self.tokens:
            return range_term(
                self.tokens.field[0],
                self.tokens.onesidedrange.op,
                self.tokens.onesidedrange.bound
            )
        if 'subquery' in self.tokens:
            tokens_field0 = self.tokens.field[0].replace('_.', 'attributes.')
            if tokens_field0 != '__default_field__':
                return '{}'.format(self.tokens.subquery[0])\
                    .replace('__default_field__', tokens_field0)\
                    .replace('__default_operator__', '$regex')
            else:
                return '{}'.format(self.tokens.subquery[0])

        raise ParseException('Search term did not match query syntax: %s' % self.tokens)
Esempio n. 28
0
 def __init__(self, pstr, loc, msg, elem):
     self.exc = ParseException(pstr, loc, msg, elem)
 def pa(s, l, t):
     if not startloc <= col(l, s) <= endloc:
         raise ParseException(s, l, "text not in expected columns")
Esempio n. 30
0
    def __repr__(self):
        # print([t for t in self.tokens.items()])
        if 'singleterm' in self.tokens:
            tokens_fieldname = self.tokens.fieldname.replace('_.', 'attributes.')
            if self.tokens.fieldname == '_exists_':
                return f'{{"attributes.{self.tokens.singleterm}": {{"$exists": true}}}}'
            else:
                if self.tokens.field[0] == '__default_field__':
                    return f"{{\"__default_field__\": {{\"__default_operator__\": \"{self.tokens.singleterm}\", \"$options\": \"i\"}}}}"
                else:
                    return f'{{"{tokens_fieldname}": {{"$regex": "{self.tokens.singleterm}", "$options": "i"}}}}'
        if 'phrase' in self.tokens:
            tokens_field0 = self.tokens.field[0].replace('_.', 'attributes.')
            if tokens_field0 == '__default_field__':
                return f"{{\"__default_field__\": {{\"__default_operator__\": \"{self.tokens.phrase}\", \"$options\": \"i\"}}}}"
            else:
                return f'{{"{tokens_field0}": {{"$regex": "\\\\b{self.tokens.phrase}\\\\b", "$options": "i"}}}}'
        if 'wildcard' in self.tokens:
            return f'{{"{self.tokens.field[0]}": {{"$regex": "\\\\b{self.tokens.wildcard}\\\\b", "$options": "i"}}}}'
        if 'regex' in self.tokens:
            return f'{{"{self.tokens.field[0]}": {{"$regex": "{self.tokens.regex}", "$options": "i"}}}}'

        def range_term(field, operator, range):
            if field in ['duplicateCount', 'timeout']:
                range = int(range)
            else:
                range = f'"{range}"'
            return f'{{"{field}": {{"{operator}": {range}}}}}'

        if 'range' in self.tokens:
            if self.tokens.range[0].lowerbound == '*':
                lower_term = '{}'
            else:
                lower_term = range_term(
                    self.tokens.field[0],
                    '$gte' if 'inclusive' in self.tokens.range[0] else '$gt',
                    self.tokens.range[0].lowerbound
                )
            if self.tokens.range[2].upperbound == '*':
                upper_term = '{}'
            else:
                upper_term = range_term(
                    self.tokens.field[0],
                    '$lte' if 'inclusive' in self.tokens.range[2] else '$lt',
                    self.tokens.range[2].upperbound
                )
            return f'{{"$and": [{lower_term}, {upper_term}]}}'
        if 'onesidedrange' in self.tokens:
            return range_term(
                self.tokens.field[0],
                self.tokens.onesidedrange.op,
                self.tokens.onesidedrange.bound
            )
        if 'subquery' in self.tokens:
            tokens_field0 = self.tokens.field[0].replace('_.', 'attributes.')
            if tokens_field0 != '__default_field__':
                return f'{self.tokens.subquery[0]}'\
                    .replace('__default_field__', tokens_field0)\
                    .replace('__default_operator__', '$regex')
            else:
                return f'{self.tokens.subquery[0]}'

        raise ParseException(f'Search term did not match query syntax: {self.tokens}')