예제 #1
0
def search_reporter():
	try:
		newsDB = NewsDB()
		reqData = request.json
		name = str_param('name', reqData.get("name")).strip() # 去空格

		for char in r'.+*?^$|-#><=(){}[]\\': # 过滤正则表达式关键字
			if char in name:
				raise re.error('include illegal char')

		if name == '':
			raise re.error('no name')

		regex = re.compile("|".join(name.split())) # | 连接多个名字片段
		rpts = [rpt for rpt in rptDB.get_names() if regex.search(rpt) is not None]
		rptsInfo = [rptDB.get_rpt(rpt,keys=("name","avatar","news")) for rpt in rpts]

		for rpt in rptsInfo:
			rpt["newsCount"] = len(rpt.pop("news"))

		rptsInfo.sort(key=lambda rpt: rpt["newsCount"], reverse=True)

	except re.error as err:
		jsonPack = {"errcode": 0, "reporters": [], "error": repr(err)}
	except Exception as err:
		jsonPack = {"errcode": -1, "error": repr(err)}
		raise err
	else:
		jsonPack = {"errcode": 0, "reporters": rptsInfo}
	finally:
		newsDB.close()
		return json.dumps(jsonPack)
def example_21():
	"""
	Исключения в регулярных выражениях
	"""
	pfi()

	try:
		re.match(r'\w+)', 'Text to test')
	except Exception as e:
		print('\nException class =', e.__class__, '\nError text =', e)
		print('Exception bases =', e.__class__.__bases__)
		if sys.version_info >= (3,5,0):
			print('msg =', e.msg)
			print('pattern =', e.pattern)
			print('pos =', e.pos)
			print('lineno =', e.lineno)
			print('colno =', e.colno)
	try:
		re.match(r'(\w+)\2', 'Text to test')
	except Exception as e:
		print('\nException class =', e.__class__, '\nError text =', e)
		print('Exception bases =', e.__class__.__bases__)
		if sys.version_info >= (3,5,0):
			print('msg =', e.msg)
			print('pattern =', e.pattern)
			print('pos =', e.pos)
			print('lineno =', e.lineno)
			print('colno =', e.colno)

	re.error('Error message', 'abc', 17)
예제 #3
0
    def format_field(self, value, format_spec):
        """Format the value using the format_spec.

        The aim of this function is to remove the delegation to __format__() on
        the object. For our needs we do not want the object to be responsible
        for its own formatting since it is not aware of the renderer itself.

        A rekall.obj.BaseObject instance must support the following
        formatting operations:

        __unicode__
        __str__
        __repr__
        and may also support __int__ (for formatting in hex).
        """
        m = self.standard_format_specifier_re.match(format_spec)
        if not m:
            raise re.error("Invalid regex")

        fields = m.groupdict()

        # Format the value according to the basic type.
        type = fields["type"] or "s"
        try:
            value = getattr(
                self, "format_type_%s" % type)(value, fields)
        except AttributeError:
            raise re.error("No formatter for type %s" % type)

        try:
            return format(value, format_spec)
        except ValueError:
            return str(value)
예제 #4
0
def update_creatures(creatures, which):
    """Parses a database and adds the data to a dict of creatures.
    @creatures: the dict returned from get_creatures.
    @which: the database to parse."""

    regex = regexes[which]
    ckey = name2key[which]

    if "dbid" not in regex.groupindex:
        raise re.error("regex must match dbid group", regex)
    if regex.groups < 2:
        raise re.error("regex must match something in addition to dbid", regex)

    matches = re.finditer(regex, get_db_contents(which))

    if which == "Lore":
        # dbid == cid
        for match in matches:
            creatures[match["dbid"]][ckey] = match["lore"]

    else:
        info = {}
        for match in matches:
            groups = match.groupdict()
            dbid = groups.pop("dbid")

            if regex.groups == 2:
                info[dbid] = groups.popitem()[1]  # get the other item's value
            elif regex.groups > 2:
                info[dbid] = groups

        for cid in creatures:
            # not every creature has a material
            creatures[cid][ckey] = info.get(creatures[cid]["dbid"])
예제 #5
0
파일: lec.py 프로젝트: ShaneDrury/pyon-qed
 def get_from_file(self, fp):
     raw_data = fp.read()
     fname = os.path.basename(fp.name)
     m = re.match(PION_LEC_REGEX['filename'], fname)
     if m:
         config_number = int(m.group('config_number'))
     else:
         raise re.error("Cannot match filename")
     r = LEC_COMPILED_REGEX
     m = re.match(r, raw_data)
     if m:
         dic = {
             'config_number': config_number,
             'LS': float(m.group('LS')),
             'B0': float(m.group('B0')),
             'F0': float(m.group('F0')),
             'L64': float(m.group('L64')),
             'L85': float(m.group('L85')),
             'L4': float(m.group('L4')),
             'L5': float(m.group('L5')),
             'm_res': float(m.group('MRES')),
             'miu': float(m.group('miu')),
         }
     else:
         raise re.error("Cannot match file")
     return dic
예제 #6
0
    def format_field(self, value, format_spec):
        """Format the value using the format_spec.

        The aim of this function is to remove the delegation to __format__() on
        the object. For our needs we do not want the object to be responsible
        for its own formatting since it is not aware of the renderer itself.

        A rekall.obj.BaseObject instance must support the following
        formatting operations:

        __unicode__
        __str__
        __repr__
        and may also support __int__ (for formatting in hex).
        """
        m = self.standard_format_specifier_re.match(format_spec)
        if not m:
            raise re.error("Invalid regex")

        fields = m.groupdict()

        # Format the value according to the basic type.
        type = fields["type"] or "s"
        try:
            value = getattr(self, "format_type_%s" % type)(value, fields)
        except AttributeError:
            raise re.error("No formatter for type %s" % type)

        try:
            return format(value, format_spec)
        except ValueError:
            return str(value)
예제 #7
0
 def storeAttrVarPair(varName, attrName):
     if attrName in attrSet:
         v = 'reassigment of attribute '+ attrName + ' to var ' + varName + '; was var ' + attrSet[attrName] 
         raise re.error(v)
     if varName in varSet:
         v = 'redefinition of group name '+ varName + ' as group ' + str(len(varList) + 1) + '; was group ' + str(varSet[varName])
         raise re.error(v)
     varList.append([attrName, varName])
     attrSet[attrName] = varName
     varSet[varName] = len(varList)
     pass
예제 #8
0
 def __init__(self, regex_pattern, input_files):
     try:
         self.regex = re.compile(regex_pattern)
         self.input_files = input_files
         self.search_results = {}
     except re.error:
         raise re.error("regular expression is not valid")
예제 #9
0
    def search_by_regex(self):
        """ Search by regex pattern"""
        clear_screen()
        tasks = self.fetch_tasks()
        results = []

        while True:
            regex_search = input("This will search regular expression. "
                                 "Please enter:   ")
            try:
                regex_search = re.compile(regex_search)
            except re.error():
                print("Not a valid regular expression. Try again.")
                continue
            for task in tasks:
                if (re.search(regex_search, task['Task name'])
                        or re.search(regex_search, task['Date'])
                        or re.search(regex_search, task['Duration'])
                        or re.search(regex_search, task['Notes'])):
                    results.append(task)
            if len(results) == 0:
                clear_screen()
                print("Sorry. No matches. Please try again.")
                continue
            else:
                clear_screen()
                self.print_tasks(results)
                break
예제 #10
0
def shlex_argparse(argstr):
    """ Get positional arguments and optional arguments from argstr.
    
    Example::
    
       parse p1 p2 'p3 three' --o1=one '--o2=two' --o3='3 three' p'4 four' --o4 four Four
       
       as args = ['p1', 'p2', 'p3 three', 'p4 four']
       and kwargs = {'o1':'one', 'o2':'two', 'o3':'3 three', 'o4':['four', 'Four']}
    """

    args = shlex.split(argstr)
    differentiate = re.compile(r"^(?:(?:--(\w+)=([\s\S]*))|(?:--(\w+))|(\S[\s\S]*))$")

    positional_args = []
    optional_args = {}
    i = 0
    while i < len(args):
        match = differentiate.match(args[i])
        if match is None:
            raise re.error("Not matched", pattern=differentiate)
        key, value, var_key, pos = match.groups()
        if pos:
            if pos != "--":
                positional_args.append(pos)
        elif key:
            optional_args[key] = value
        elif var_key:
            optional_args[var_key], j = _until_dh(args[i + 1 :])
            i += j
        i += 1

    return positional_args, optional_args
예제 #11
0
 def parse(self):
     if os.stat(self.path).st_size == 0:
         raise IOError("File is empty.")
     with open(self.path, "r") as f:
         for sentence in f:
             cond_trans = self.cond_trans_pattern.search(sentence)
             uncond_trans = self.uncond_trans_pattern.search(sentence)
             if sentence == "":
                 break
             elif cond_trans:
                 transition_dest, params, condition, \
                     output_range, state_name = cond_trans.groups()
                 tr = Conditional(source=transition_dest,
                                  target=state_name,
                                  cond_expr=condition,
                                  labels=output_range,
                                  params=params)
                 self.states.add(tr.target)
                 self.conditional_transitions.append(tr)
             elif uncond_trans:
                 transition_dest, state_name = uncond_trans.groups()
                 tr = UnConditional(source=transition_dest,
                                    target=state_name)
                 self.states.add(tr.target)
                 self.unconditional_transitions.append(tr)
             else:
                 raise re.error("{} didn't match.".format(sentence))
     self.begin_state = sorted(self.states)[0]
 def get_resampler(self, 
                     resampler_type=ResamplerEnum.SMOTE.name, 
                     sampling_strategy='minority',
                     k_neighbors = 3,
                     allow_minority = True,
                     tomek_sampling_strategy = 'majority'):
     smote = SMOTE(random_state=self.random_state, 
                      n_jobs=self.n_jobs, 
                      sampling_strategy=sampling_strategy, 
                      k_neighbors=k_neighbors)
     if resampler_type.name == ResamplerEnum.SMOTE.name:
         return smote
     if resampler_type.name == ResamplerEnum.AllKNN.name:
         return AllKNN(allow_minority=allow_minority, 
                       n_jobs=self.n_jobs) 
     if resampler_type.name == ResamplerEnum.SMOTETomek.name:
         tomekLinks = TomekLinks(n_jobs=self.n_jobs, 
                                 sampling_strategy=tomek_sampling_strategy)
         return SMOTETomek(random_state=self.random_state, 
                           n_jobs=self.n_jobs, 
                           smote=smote, 
                           tomek=tomekLinks) 
     if resampler_type.name == ResamplerEnum.RandomUnderSampler.name:
         return RandomUnderSampler(random_state=self.random_state, 
                                   sampling_strategy=sampling_strategy)
     if resampler_type.name == ResamplerEnum.SMOTEENN.name:
         return SMOTEENN(random_state=self.random_state, 
                         n_jobs=self.n_jobs, 
                         smote=smote)        
     raise error('Theres is no resampler configured')
예제 #13
0
def match_groups(regex, target):
    match = re.match(regex, target)
    if match is None:
        raise re.error(
            f"Regex does not match “{target}”. RE Pattern: {regex}", regex
        )
    return match.groups()
예제 #14
0
def test_sphinx_raises_with_prefix_one(self):
    """This is a sphinx docstring.

    :raises ~re.error: Sometimes
    """
    import re

    raise re.error("hi")
예제 #15
0
파일: loader.py 프로젝트: cnxtech/picu
 def compile(cls, pattern, flags=0):
     # TODO: pass in UParseError argument to get better diagnostics
     args = str_to_uchar_array_with_len(pattern) + (flags, None)
     try:
         regex = icu.uregex_open(*args)
         return _ICURegex(regex)
     except PICUException as e:
         raise re.error(str(e))
예제 #16
0
def test_find_sphinx_attr_raises_exact_exc(self):
    """This is a sphinx docstring.

    :raises re.error: Sometimes
    """
    import re

    raise re.error("hi")
예제 #17
0
def test_find_valid_missing_sphinx_attr_raises(self):  # [missing-raises-doc]
    """This is a sphinx docstring.

    :raises re.anothererror: Sometimes
    """
    from re import error

    raise error("hi")
예제 #18
0
def test_find_sphinx_attr_raises_substr_exc(self):
    """This is a sphinx docstring.

    :raises re.error: Sometimes
    """
    from re import error

    raise error("hi")
예제 #19
0
 def __get_regexngrams(self, doc: str, pattern: str):
     try:
         pattern = re.compile(pattern)
         doc = pattern.split(doc)
     except re.error as e:
         print(f"Error with regex '{pattern}'")
         raise re.error(e)
     grams = [doc[x:x + self.n:1] for x in range(len(doc) - self.n + 1)]
     return ['_'.join(g) for g in grams]
예제 #20
0
파일: task.py 프로젝트: ChsHub/cut_videos
    def _get_video_fps(self, file):
        output = getoutput(fps_command % (ffprobe_path, file))
        if not output:
            return 1  # in case of audio

        output = output.strip()
        if '/' in output:
            output = output.split('/')
            if len(output) == 2:
                return float(output[0]) / float(output[1])
            elif len(output) == 3:
                return float(output[0]) / float(output[1].split('\n')[0])
            else:
                exception('GET FPS FAIL %s' % str(output))
                raise NotImplementedError

        error('UNKNOWN FRAMERATE VALUE %s' % output)
        raise NotImplementedError
예제 #21
0
def test_find_valid_missing_google_attr_raises(self):  # [missing-raises-doc]
    """This is a google docstring.

    Raises:
        re.anothererror: Sometimes
    """
    from re import error

    raise error("hi")
예제 #22
0
 def test_scan_aborts_due_to_invalid_regex(self,
                                           mock_config: mock.MagicMock):
     self.options.regex = True
     test_scanner = TestScanner(self.options)
     mock_config.side_effect = re.error(  # type: ignore
         msg="Invalid regular expression", pattern="42")
     with self.assertRaisesRegex(types.ConfigException,
                                 "Invalid regular expression"):
         test_scanner.scan()
예제 #23
0
 def get_vectorizer(self,
                    vectorizer_type=VectEnum.TfidfVectorizer,
                    min_df=1,
                    tokenizer_type=None):
     if vectorizer_type.name == VectEnum.TfidfVectorizer.name:
         return TfidfVectorizer(min_df=min_df, tokenizer=tokenizer_type)
     if vectorizer_type.name == VectEnum.CountVectorizer.name:
         return CountVectorizer(min_df=min_df, tokenizer=tokenizer_type)
     raise error('Theres is no vect configured')
예제 #24
0
def test_google_raises_with_prefix_one(self):
    """This is a google docstring.

    Raises:
        ~re.error: Sometimes
    """
    import re

    raise re.error("hi")
예제 #25
0
def test_find_google_attr_raises_exact_exc(self):
    """This is a google docstring.

    Raises:
        re.error: Sometimes
    """
    import re

    raise re.error("hi")
예제 #26
0
def test_find_google_attr_raises_substr_exc(self):
    """This is a google docstring.

    Raises:
        re.error: Sometimes
    """
    from re import error

    raise error("hi")
예제 #27
0
def test_latency(sockperf_path, host, port):
    regex = re.compile(r'^sockperf: Summary: Latency is ([0-9.]+) usec')
    cmd = [sockperf_path, 'ping-pong', '-i', host, '-p', str(port)]
    result = run(cmd, stdin=None, stdout=PIPE, stderr=STDOUT)
    lines = result.stdout.decode().split('\n')
    for line in lines:
        match = regex.search(line)
        if match:
            return float(match.group(1))
    raise re.error("No lines in the stdout of sockperf ping-pong matched the regex")
예제 #28
0
 def replacement_func(match_obj):
     """Unquote match obj of two character sequence"""
     two_chars = match_obj.group(0)
     if two_chars == b"\\n":
         return b"\n"
     elif two_chars == b"\\\\":
         return b"\\"
     else:
         raise re.error(
             "Unknown quoted sequence {qs} found".format(qs=two_chars))
예제 #29
0
 def replacement_func(match_obj):
     """This is called on the match obj of any char that needs quoting"""
     char = match_obj.group(0)
     if char == b"\n":
         return b"\\n"
     elif char == b"\\":
         return b"\\\\"
     else:
         raise re.error(
             "Bad char '{bc}' shouldn't need quoting".format(bc=char))
예제 #30
0
 def _parse_line_regex(self, line):
     self.line_str = line
     # this next line object is non-copyable! be careful when copying this class!
     self.match_obj = self.regex_comp.match(line)
     if self.match_obj == None:
         raise re.error("Failure while parsing line: " + line + " as expression of the form " + self.regex_str + ". Apparently it doesn't comply to the regex provided.")
     if self.match_obj.group("sync") == "*":
         self.sync = True
     else:
         self.sync = False
예제 #31
0
 def __init__(self, ref_str: str):
     self.str = ref_str
     if ref_str in ONE_CHAPTER_BOOKS:
         ref_str = f"{ref_str} 1"  # make it parse
     match = VERSE_REGEX.match(ref_str)
     if match is None:
         raise re.error(f"'{ref_str}' does not match regex!")
     book, first, last = match.groups()
     self.first = Verse(book.strip(), first)
     self.last = Verse(book.strip(), last) if last else None
예제 #32
0
def test_find_invalid_missing_sphinx_attr_raises(self):
    """This is a sphinx docstring.
    pylint allows this to pass since the comparison between Raises and
    raise are based on the class name, not the qualified name.

    :raises bogusmodule.error: Sometimes
    """
    from re import error

    raise error("hi")
예제 #33
0
 def get_from_file(self, file_name):
     """
     Parse all the data from an Iwasaki Charged Meson file e.g. \
     meson_BOX_RELOADED.src0.ch1-0.3333333333.ch2-0.3333333333.m10.03.m20.03.dat.510
     """
     data = []
     raw_data = file_name.read()
     fname = os.path.basename(file_name.name)
     m = re.match(IWASAKI_REGEX['filename'], fname)
     if m:
         charge_1 = int(round(3 * float(m.group('charge1')), 1))
         charge_2 = int(round(3 * float(m.group('charge2')), 1))
         mass_1 = float(m.group('mass1'))
         mass_2 = float(m.group('mass2'))
         config_number = int(m.group('config_number'))
     else:
         raise re.error("Cannot match filename")
     if self.pseudo:
         r = IWASAKI_COMPILED_REGEX_PSEUDO
     else:
         r = IWASAKI_COMPILED_REGEX
     matched = [m.groupdict() for m in r.finditer(raw_data)]
     for match in matched:
         if self.pseudo:
             source = 'GAM_5'
             sink = 'GAM_5'
         else:
             source = match['source']
             sink = match['sink']
         re_data = []
         im_data = []
         time_slices = []
         for line in match['data'].split('\n'):
             try:
                 n, re_c, im_c = line.split()
                 re_c = float(re_c)
                 im_c = float(im_c)
                 n = int(n)
                 re_data.append(re_c)
                 im_data.append(im_c)
                 time_slices.append(n)
             except ValueError:
                 pass
         dic = {'source': source,
                'sink': sink,
                'data': re_data,
                'im_data': im_data,
                'time_slices': time_slices,
                'mass_1': mass_1,
                'mass_2': mass_2,
                'charge_1': charge_1,
                'charge_2': charge_2,
                'config_number': config_number}
         data.append(dic)
     return data
예제 #34
0
 def get_from_file(self, fp):
     raw_data = fp.read()
     fname = os.path.basename(fp.name)
     m = re.match(KAON_LEC_REGEX['filename'], fname)
     if m:
         config_number = int(m.group('config_number'))
     else:
         raise re.error("Cannot match filename")
     r = LEC_COMPILED_REGEX
     m = re.match(r, raw_data)
     if m:
         dic = {
             'config_number': config_number,
             'M2': float(m.group('M2')),
             'A_3': float(m.group('A3')),
             'A_4': float(m.group('A4')),
         }
         log.debug(dic)
     else:
         raise re.error("Cannot match file")
     return dic
예제 #35
0
    def isMatch(self, task):
        """
        Return a boolean based on whether the supplied task satisfies self.text.

        This filter can handle basic and/or/not conditions. The syntax is as
        follows:

        :AND   :   ',' or whitespace (' ')
        :OR    :   '|'
        :NOT   :   prefixed '~' or '!'

        These operators follow the following order of precedence: OR, AND, NOT.
        So, for example:

        :'work job1 | home':                Either  (matches 'work'
                                                     AND 'job1')
                                            OR      (matches 'home')

        :'norweigan blue ~dead | !parrot':  Either  (matches 'norweigan'
                                                     AND 'blue'
                                                     AND does NOT match 'dead')
                                            OR      (does NOT match 'parrot')

        Since the python re module is used, most of the escaped regex
        characters will also work when attached to one of the (comma- or space-
        delimited) strings. E.g.:
        - \bcleese\b will match 'cleese' but not 'johncleese'
        - 2014-\d\d-07 will match '2014-03-07' but not '2014-ja-07'

        The method can handle parentheses in the search strings. Unlike most
        regex characters, these don't need to be escaped since they are escaped
        automatically. So the search string '(B)' will match '(B) nail its
        feet to the perch'.
        """
        mymatch = False
        comp = re.compile(r'\s*([\!~])?([\(\)\w\\\-]+)[\s,]*', re.U)
        restring = comp.sub(simpleTextFilterRepl, self.text, re.U)
        try:
            if ')' in restring:
                raise re.error('')  # otherwise adding closing parenth avoids error here
            mymatch = re.search(restring, task.text, re.I | re.U)
        except re.error:
            comp2 = re.compile(r'\s*\((?=[^?])', re.U)
            restring2 = comp2.sub(r'\\(', restring, re.U)
            comp3 = re.compile(r'(?<!\))\)(?=\))', re.U)
            restring3 = comp3.sub(r'\\)', restring2, re.U)
            """temporary solution: user input illegal characters in the
            search string (+,?,\) otherwise the program crashed"""
            try:
                mymatch = re.search(restring3, task.text, re.I | re.U)
            except Exception:
                mymatch = True
        return mymatch
예제 #36
0
def parse_iwasaki_32c_charged_meson_file(f):
    """
    This is just as slow as regex
    """
    data = []
    fname = os.path.basename(f.name)
    m = re.match(IWASAKI_REGEX['filename'], fname)
    if m:
        charge_1 = int(round(3 * float(m.group('charge1')), 1))
        charge_2 = int(round(3 * float(m.group('charge2')), 1))
        mass_1 = float(m.group('mass1'))
        mass_2 = float(m.group('mass2'))
        config_number = int(m.group('config_number'))
    else:
        raise re.error("Cannot match filename")

    for line in f:
        ls = line.strip()
        split_spaces = ls.split(' ')
        if ls == 'STARTPROP':
            re_data = []
            im_data = []
            time_slices = []
        elif split_spaces[0] == 'MASSES:':
            pass
        elif split_spaces[0] == 'SOURCE:':
            source = split_spaces[1]
        elif split_spaces[0] == 'SINKS:':
            sink = split_spaces[1]
        elif ls == 'ENDPROP':
            dic = {'source': source,
                   'sink': sink,
                   'data': re_data,
                   'im_data': im_data,
                   'time_slices': time_slices,
                   'mass_1': mass_1,
                   'mass_2': mass_2,
                   'charge_1': charge_1,
                   'charge_2': charge_2,
                   'config_number': config_number}
            data.append(dic)
        else:
            t, real, im = ls.split()
            time_slices.append(int(t))
            re_data.append(float(real))
            im_data.append(float(im))
    return data
    def servicesInFolder(self, foldername, namefilter=None):
        """
        """

        # test if name filter is valid regex
        if namefilter:
            try:
                re.compile(namefilter)
            except re.error:
                raise re.error("Specified namefilter argument must be a vaild regex. Aborting.")

        listofservices = []
        folderURL = "/arcgis/admin/services/" + foldername

        # This request only needs the token and the response formatting parameter 
        params = urllib.urlencode({'token': self.token, 'f': 'json'})
        
        headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}
        
        # Connect to URL and post parameters    
        httpConn = httplib.HTTPConnection(self.server, self.port)
        httpConn.request("POST", folderURL, params, headers)

        # Read response
        response = httpConn.getresponse()
        if (response.status != 200):
            httpConn.close()
            raise ServiceException("Could not read folder information.")
        else:
            data = response.read()
            
            # Check that data returned is not an error object
            if not assertJsonSuccess(data):          
                raise ServiceException("Error when reading folder information. " + str(data))

            # Deserialize response into Python object
            dataObj = json.loads(data)
            httpConn.close()
            
            for item in dataObj['services']:
                # if namefilter, check to see if name matches; if not, skip to next item
                if namefilter:
                    if not re.search(namefilter, item['serviceName']):
                        continue
                listofservices.append(item['serviceName'] + "." + item['type'])

        return listofservices
예제 #38
0
def compile(regexPatternIn, flags=0, debug = 0):
    BEG         = r'(?#'
    END         = r')'
    ZIN         = r'<<>>'
    NXTTAG      = r'<><>'
    CHILDREN    = r'<<>*>'
    PARENT      = r'<*<>>'
    SIBLING     = r'<>*<>'
    TAG         = r'<>'
    PATDIRECT   = r'\(\?#(<(?:PASS|SPAN|SEARCH|NXTPOSINI).*?>)\)'

    directives   = re.findall(PATDIRECT, regexPatternIn)  # @UnusedVariable
    regexPattern = re.sub(PATDIRECT, '', regexPatternIn)
    patterns = re.split(r'(?:<\*<|>\*<|>\*>|>|<)', regexPattern)
    if not (patterns[0].startswith(BEG) and patterns[-1].endswith(END)): return re.compile(regexPatternIn, flags)
    patterns = patterns[1:-1]
    
    tags = re.findall(r'[<>][<>*]*', regexPattern)
    if len(tags) <= 1 or tags[0] not in ['<', '<*<']: return None
    token = ''.join(tags)
    if token == TAG: return ExtCompile(regexPattern, flags)
    patt1 = BEG + '<' + patterns[0] + '>' + END
    zone = 'zin'
    if token == ZIN:
        patt2 = patterns[-1] if tags[-1] == '>' else ''
        patt2 = BEG + '<' + patt2 + '>' + END
        zone = 0 if tags[1] == '<' else patterns[1]
    elif token == CHILDREN:
        chldpat = '|'.join(re.findall(r'(?<= )[.]([a-z\d_-]+)(?=[ >])', patt1))
        chldpat = chldpat or '__TAG__' 
        patt2 = patterns[-1] if tags[-1] == '>*>' else ''
        patt2 = BEG + '<' + chldpat + ' ' + patt2 + '>' + END
    elif token == NXTTAG:
        patt2 = patterns[-1] if tags[-1] == '>' else ''
        patt2 = BEG + '<__TAG__ ' + patt2 + '>' + END
        zone = 'zoutr'
    elif token in [PARENT, SIBLING]:
        raise re.error(token + ' not yet implemented')
        pass
    else:
        return None

    if debug: return regexPattern, flags, patt1, patt2, zone
    srchRegexObj = ExtCompile(patt2, flags)
    if not patt1: return srchRegexObj
    spanRegexObj = ExtCompile(patt1, flags)
    return zinwrapper(regexPattern, flags, spanRegexObj, srchRegexObj, zone)
예제 #39
0
    def raw_match_indexes(self, query, ignore_case=False) -> list:
        """Returns a list with the indexes of each message that match the passed message
        Parameters:
            query: The string query to search for
            ignore_case (optional): Whether to search by case sensitive
        Return:
            A list sorted list of indexes for messages exactly matching query
        """
        # python re cheat sheet: https://www.debuggex.com/cheatsheet/regex/python

        indexes = []
        try:
            r = re.compile(query, re.IGNORECASE) if ignore_case else re.compile(query)
            for i in range(len(self._convo)):
                if r.fullmatch(self._convo[i][1]) is not None:
                    indexes.append(i)
            return indexes
        except re.error:
            raise re.error("\"{0}\" is not a valid regex string".format(query))
예제 #40
0
파일: bootstrap.py 프로젝트: csmall/rnms
    def create_logmatches(self):
        logmatch_set = model.LogmatchSet(display_name=u'Default')
        model.DBSession.add(logmatch_set)

        for row in database_data.logfiles:
            lf = model.Logfile(row[0], row[1])
            lf.logmatchset = logmatch_set
            model.DBSession.add(lf)

        for row in database_data.logmatch_default_rows:
            lmr = model.LogmatchRow()
            try:
                (lmr.match_text, lmr.match_start, lmr.host_match,
                    lmr.attribute_match, lmr.state_match, event_tag,
                    fields) = row
            except Exception as errmsg:
                raise ValueError(
                    "Cannot add row \"%s\": %s.\n" % (row[0], errmsg))
            else:
                lmr.event_type = model.EventType.by_tag(event_tag)
                if lmr.event_type is None:
                    raise ValueError(
                        "Bad EventType tag \"{}\" in LogMatchRow {}".
                        format(event_tag, lmr.match_text))
                self.used_event_types.append(lmr.event_type.id)
                try:
                    lmr.match_sre = re.compile(row[0])
                except re.error as errmsg:
                    raise re.error(
                        "Cannot compile message \"{}\": {}".
                        format(row[0], errmsg))
                lmr.logmatch_set = logmatch_set
                if fields is not None:
                    for field in fields:
                        lmf = model.LogmatchField()
                        try:
                            (lmf.event_field_tag, lmf.field_match) = field
                        except ValueError:
                            raise ValueError(
                                "Bad Field \"{}\" in LogMatchRow {}".format(
                                    field, lmr.match_text))
                        lmr.fields.append(lmf)
                model.DBSession.add(lmr)
예제 #41
0
 def base(self):
     """Parse a subexpression that can be starred: single letter or group."""
     if self.pos == len(self.expr) or self.expr[self.pos] == ')':
         return self.epsilon()
     if self.expr[self.pos] == '(':
         self.pos += 1
         ret = self.expression()
         if self.pos == len(self.expr) or self.expr[self.pos] != ')':
             raise LanguageError("Close paren expected at char " + str(self.pos))
         self.pos += 1
         return ret
     if self.expr[self.pos] == '\\':
         self.pos += 1
         if self.pos == len(self.expr):
             raise re.error("Character expected after backslash")
     self.alphabet.add(self.expr[self.pos])
     state = self.newstate(self.expr[self.pos])
     self.pos += 1
     state = frozenset([state])
     return state, state, False
예제 #42
0
파일: re_test.py 프로젝트: mdavid/dlr
def test_sanity_re():
    '''
    Basic sanity tests for the re module.  Each module member is
    used at least once.
    '''
    #compile
    Assert(hasattr(re.compile("(abc){1}"), "pattern"))
    Assert(hasattr(re.compile("(abc){1}", re.L), "pattern"))
    Assert(hasattr(re.compile("(abc){1}", flags=re.L), "pattern"))
    
    #I IGNORECASE L LOCAL MMULTILINE S DOTALL U UNICODE X VERBOSE
    flags = ["I", "IGNORECASE",
                 "L", "LOCALE",
                 "M", "MULTILINE",
                 "S", "DOTALL",
                 "U", "UNICODE",
                 "X", "VERBOSE"]
    
    for f in flags:
        Assert(hasattr(re, f))
    
    #search
    AreEqual(re.search("(abc){1}", ""), None)
    AreEqual(re.search("(abc){1}", "abcxyz").span(), (0,3))
    AreEqual(re.search("(abc){1}", "abcxyz", re.L).span(), (0,3))
    AreEqual(re.search("(abc){1}", "abcxyz", flags=re.L).span(), (0,3))
    AreEqual(re.search("(abc){1}", "xyzabc").span(), (3,6))
    
    AreEqual(re.search("(abc){1}", buffer("")), None)
    AreEqual(re.search("(abc){1}", buffer("abcxyz")).span(), (0,3))
    AreEqual(re.search("(abc){1}", buffer("abcxyz"), re.L).span(), (0,3))
    AreEqual(re.search("(abc){1}", buffer("abcxyz"), flags=re.L).span(), (0,3))
    AreEqual(re.search("(abc){1}", buffer("xyzabc")).span(), (3,6))
    
    #match
    AreEqual(re.match("(abc){1}", ""), None)
    AreEqual(re.match("(abc){1}", "abcxyz").span(), (0,3))
    AreEqual(re.match("(abc){1}", "abcxyz", re.L).span(), (0,3))
    AreEqual(re.match("(abc){1}", "abcxyz", flags=re.L).span(), (0,3))
    
    #split
    AreEqual(re.split("(abc){1}", ""), [''])
    AreEqual(re.split("(abc){1}", "abcxyz"), ['', 'abc', 'xyz'])
    #maxsplit
    AreEqual(re.split("(abc){1}", "abc", 0), ['', 'abc', ''])
    for i in xrange(3):
        AreEqual(re.split("(abc){1}", "abc", maxsplit=i), ['', 'abc', ''])
        AreEqual(re.split("(abc){1}", "", maxsplit=i), [''])
        AreEqual(re.split("(abc){1}", "abcxyz", maxsplit=i), ['', 'abc', 'xyz'])
    AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=0), ['', 'abc', 'xyz', 'abc', ''])
    AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=1), ['', 'abc', 'xyzabc'])
    AreEqual(re.split("(abc){1}", "abcxyzabc", maxsplit=2), ['', 'abc', 'xyz', 'abc', ''])
    
    #findall
    AreEqual(re.findall("(abc){1}", ""), [])
    AreEqual(re.findall("(abc){1}", "abcxyz"), ['abc'])
    AreEqual(re.findall("(abc){1}", "abcxyz", re.L), ['abc'])
    AreEqual(re.findall("(abc){1}", "abcxyz", flags=re.L), ['abc'])
    AreEqual(re.findall("(abc){1}", "xyzabcabc"), ['abc', 'abc'])
    
    #finditer
    AreEqual([x.group() for x in re.finditer("(abc){1}", "")], [])
    AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz")], ['abc'])
    AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", re.L)], ['abc'])
    AreEqual([x.group() for x in re.finditer("(abc){1}", "abcxyz", flags=re.L)], ['abc'])
    AreEqual([x.group() for x in re.finditer("(abc){1}", "xyzabcabc")], ['abc', 'abc'])
    rex = re.compile("foo")
    for m in rex.finditer("this is a foo and a foo bar"):
        AreEqual((m.pos, m.endpos), (0, 27))
    for m in rex.finditer(""):
        AreEqual((m.pos, m.endpos), (0, 1))
    for m in rex.finditer("abc"):
        AreEqual((m.pos, m.endpos), (0, 4))
    for m in rex.finditer("foo foo foo foo foo"):
        AreEqual((m.pos, m.endpos), (0, 19))
    
    #sub
    AreEqual(re.sub("(abc){1}", "9", "abcd"), "9d")
    AreEqual(re.sub("(abc){1}", "abcxyz",'abcd'), "abcxyzd")
    AreEqual(re.sub("(abc){1}", "1", "abcd", 0), "1d")
    AreEqual(re.sub("(abc){1}", "1", "abcd", count=0), "1d")
    AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 1), "1dabcd")
    AreEqual(re.sub("(abc){1}", "1", "abcdabcd", 2), "1d1d")
    
    #subn
    AreEqual(re.subn("(abc){1}", "9", "abcd"), ("9d", 1))
    AreEqual(re.subn("(abc){1}", "abcxyz",'abcd'), ("abcxyzd",1))
    AreEqual(re.subn("(abc){1}", "1", "abcd", 0), ("1d",1))
    AreEqual(re.subn("(abc){1}", "1", "abcd", count=0), ("1d",1))
    AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 1), ("1dabcd",1))
    AreEqual(re.subn("(abc){1}", "1", "abcdabcd", 2), ("1d1d",2))
    
    #escape
    AreEqual(re.escape("abc"), "abc")
    AreEqual(re.escape(""), "")
    AreEqual(re.escape("_"), "\\_")
    AreEqual(re.escape("a_c"), "a\\_c")
    
    #error
    exc = re.error()
    exc = re.error("some args")
    
    #purge
    re.purge()
예제 #43
0
def ExtCompileOLD(regexPattern, flags = 0):
    """
    tr : TAG buscado
    td.width : Atributo "width" buscado en td
    td.div.class="playlist_thumb" : Atributo class de td.div con valor playlist_thumb 
    td.div.a.img.src=icon : Atributo "src" buscado y almacenado en variable "icon".  td.div.a.img = td..img
    td[2].div[2].*=hist : td[2] = Segundo tag td hijo de tr, td[2].div[2] = Segundo tag div de td[2], * = comentario
    td..a.href=url : td..a  es notación compacta de td.div.a
    
    >>> req = compile(r'(?#<TAG tr td.width div.class="playlist_thumb" td.div.a.img.src=icon td[2].div[2].*=hist td..a.href=url>)', 0)
    >>> req.tags.keys()
    ['tr.td[2].div[2]', 'tr.div', 'tr.td', 'tr', 'tr.td.div.a.img', 'tr.td..a']
    >>> req.varList
    [['tr.td.div.a.img.src', 'icon'], ['tr.td[2].div[2].*', 'hist'], ['tr.td..a.href', 'url']]
    
    >>> equis = compile('(?#<TAG ese a.*="http//esto/es/prueba"=icon href=url>)', 0)
    >>> equis.varList
    [['ese.a.*', 'icon'], ['ese.href', 'url']]
    >>> for tag in equis.tags:
    ...    print tag, [(key, equis.tags[tag][key].pattern) for key in equis.tags[tag] if equis.tags[tag][key]]
    ... 
    ese []
    ese.a [('*', 'http//esto/es/prueba\\\\Z')]
    
    >>> equis = compile('(?#<TAG a href span.src=icon span.*=label div.id>)',0)
    >>> equis.tags
    {'a': {'href': ''}, 'a.div': {'id': ''}, 'a.span': {'src': '', '*': ''}}
    >>> equis = compile('(?#<TAG a href span{src=icon *=label} div.id>)',0)
    >>> equis.tags
    {'a': {'href': ''}, 'a.div': {'id': ''}, 'a.span': {'src': '', '*': ''}}
    >>> equis = compile('(?#<TAG a href span{src=icon *=label div.id>}',0)
    Traceback (most recent call last):
      File "C:\Python27\lib\doctest.py", line 1315, in __run
        compileflags, 1) in test.globs
      File "<doctest __main__.compile[10]>", line 1, in <module>
        equis = compile('(?#<TAG a href span(src=icon *=label div.id>)',0)
      File "C:\\Users\\Alex Montes Barrios\\git\\addonDevelopment\\xbmc addon development\\src\\xbmcUI\\CustomRegEx.py", line 534, in compile
        raise re.error(v)
    error: unbalanced parenthesis
    
    """
    def storeAttrVarPair(varName, attrName):
        if attrName in attrSet:
            v = 'reassigment of attribute '+ attrName + ' to var ' + varName + '; was var ' + attrSet[attrName] 
            raise re.error(v)
        if varName in varSet:
            v = 'redefinition of group name '+ varName + ' as group ' + str(len(varList) + 1) + '; was group ' + str(varSet[varName])
            raise re.error(v)
        varList.append([attrName, varName])
        attrSet[attrName] = varName
        varSet[varName] = len(varList)
        pass
    
#     match = re.search('\(\?#<(?P<tagpattern>[a-zA-Z][a-zA-Z\d]*)(?P<vars>[^>]*>)\)',regexPattern)
    match = re.search('\(\?#<(?P<tagpattern>[a-zA-Z]\S*|__TAG__)(?P<vars>[^>]*>)\)',regexPattern)
    if not match: return re.compile(regexPattern, flags)
    
    ATTR        = r'(?P<ATTR>(?<=[{ ])\(*[a-zA-Z\d\*\.\[\]_-]+\)*(?==))'
    REQATTR     = r'(?P<REQATTR>(?<=[{ ])\(*[a-zA-Z\d\*\.\[\]_-]+\)*(?=[ >\)]))'
    VAR         = r'(?P<VAR>(?<==)[a-zA-Z][a-zA-Z\d]*(?=[ >}]))'
    STRPVAR     = r'(?P<STRPVAR>(?<==)&[a-zA-Z][a-zA-Z\d]*&(?=[ >}]))'
    PARAM       = r'(?P<PARAM>(?<==)[\'\"][^\'\"]+[\'\"](?=[ >=]))'
    TAGSUFFIX   = r'(?P<TAGSUFFIX>(?<=[ {])[a-zA-Z\d\*\.\[\]]+(?={))'
    OPENP       = r'(?P<OPENP>{)'
    CLOSEP      = r'(?P<CLOSEP>})'
    EQ          = r'(?P<EQ>=)'
    WS          = r'(?P<WS>\s+)'
    END         = r'(?P<END>>)'
    
    tagPattern = match.group('tagpattern')
    if tagPattern == '__TAG__': tagPattern = '[a-zA-Z][^\s>]*'
    rootTag = "tagpholder"
    rootTagStck = [rootTag]
    master_pat = re.compile('|'.join([TAGSUFFIX, ATTR, REQATTR, VAR, STRPVAR, PARAM, OPENP, CLOSEP, EQ, WS, END]))
    scanner = master_pat.scanner(match.group('vars'))
    totLen = 0
    varSet={}
    attrSet = {}
    tags = {rootTag:{}}
    varList = []
    for m in iter(scanner.match, None):
        sGroup = m.group()
        totLen += len(sGroup) 
#         print m.lastgroup, m.group()
        if m.lastgroup in ["ATTR", "REQATTR"]:
            if sGroup[0] == "(" and sGroup[-1] == ")":
                sGroup = sGroup[1:-1]
                if sGroup[0] == '.' and '.' not in sGroup[1:]:
                    v = 'Included tags not allowed as variables'
                    raise re.error(v)
                varName = "group" + str(1 + len(varList))
                attrName = rootTag + ATTRSEP + sGroup
                storeAttrVarPair(varName, attrName)
                pass
            elif (sGroup[0] == "(" and sGroup[-1] != ")") or (sGroup[0] != "(" and sGroup[-1] == ")"):
                v = 'unbalanced parenthesis'
                raise re.error(v)
            pathKey, sep, attrKey = sGroup.rpartition(ATTRSEP)
            if not pathKey and sep:
                pathKey = rootTag + 2*ATTRSEP + attrKey
                attrKey = ''
                tags.setdefault(pathKey, {})
                continue
            pathKey = rootTag + sep + pathKey
            pathKey = re.sub(r'[.](\d+)(?=[.]*)', r'[\1]', pathKey).replace('[1]', '')
            tags.setdefault(pathKey, {})
            sGroup = ""
            
        if m.lastgroup in ["ATTR", "WS", "EQ", 'END']: continue

        if m.lastgroup == "TAGSUFFIX":
            pathKey = sGroup
            continue

        if m.lastgroup == "OPENP":
            rootTagStck.append(pathKey)
            rootTag = '.'.join(rootTagStck)
            continue

        if m.lastgroup == "CLOSEP": 
            rootTagStck.pop()
            rootTag = '.'.join(rootTagStck)
            continue
        
        if m.lastgroup in ["VAR", "STRPVAR"]:
            varName = sGroup if m.lastgroup == "VAR" else sGroup[1:-1]
            attrName = pathKey + ATTRSEP + attrKey
            storeAttrVarPair(varName, attrName)
            sGroup = "" if m.lastgroup == "VAR" else " \s*([ \S]*?)\s* "
        tagDict = tags[pathKey]
        if not attrKey:
            v = 'Included tags not allowed as variables' 
            raise re.error(v)
        if attrKey in tagDict and m.lastgroup != "VAR":
            v = 'reasociation of attribute '+ pathKey + ATTRSEP + attrKey 
            raise re.error(v)
        tagDict.setdefault(attrKey, '')
        if sGroup: tagDict[attrKey] = re.compile(sGroup[1:-1] + r'\Z')

    if totLen == len(match.group('vars')) and len(rootTagStck) > 1:
        v = 'unbalanced parenthesis'
        raise re.error(v)
        
    if totLen != len(match.group('vars')): 
        v = 'unable to process pattern from: ' + match.group('vars')[totLen:]
        raise re.error(v)
    
    if 'tagpholder..*' in tags:
        if not varList:
            tags.pop('tagpholder..*')
            varList.append(['tagpholder..*', 'group'])
        else:
            v = 'With required Tag ".*", var are not allowed'
            raise re.error(v)
    
#     print '****'
#     print regexPattern
#     print tags
#     print varList
#     for tag in tags:
#         if not any(tags[tag].values()):continue
#         print tag, dict((key, value.pattern) for key, value in tags[tag].items() if value)
    return ExtRegexObject(regexPattern, flags, tagPattern, tags, varList)
예제 #44
0
def ExtCompile(regexPattern, flags = 0):

    def skipCharsInSet(strvar, aSet, npos = 0, peek = False):
        m = cmp_patt__in[aSet].match(strvar,npos)
        res = m.group() if m else ''
        return (res, npos + len(res)) if not peek else res
        
    def getCharsNotInSet(strvar, aSet, npos = 0, peek = False):
        m = cmp_patt_not[aSet].match(strvar,npos)
        res = m.group() if m else ''
        return (res, npos + len(res)) if not peek else res

    match = re.search('\(\?#<\(*(?P<tagpattern>[a-zA-Z]\S*|__TAG__)\)*(?P<vars>[^>]*>)\)',regexPattern)
    if not match: return re.compile(regexPattern, flags)
    
    WHITESPACE = ' \t\n\r\f\v'
    ATTRSEP = '.'
    PATH_MOD = '{'
    PATH_RES = '}'
    END_PAT = '>'
    PARAM_DEL = '\'"'
    EQ = '='
    ATTR_RDEL = PATH_MOD + EQ + END_PAT + WHITESPACE 
    ATTR_LDEL = PARAM_DEL + WHITESPACE
    
    TAG_PATT = END_PAT + WHITESPACE
    ATTR_PATT = PATH_RES + ATTR_RDEL
    PARAM_PATT = PARAM_DEL
    VAR_PATT= EQ + PATH_RES + END_PAT + ATTR_LDEL
    
    
    cmp = re.compile
    cmp_patt_not = dict([('TAG_PATT', cmp('[^' + TAG_PATT + ']+')), ('ATTR_PATT', cmp('[^' + ATTR_PATT + ']+')), ('PARAM_PATT', cmp('[^' + PARAM_PATT + ']+')), ('VAR_PATT', cmp('[^' + VAR_PATT + ']+'))])
    cmp_patt__in = dict([('PATH_RES', cmp('[' + PATH_RES + ']+')), ('ATTR_LDEL', cmp('[' + ATTR_LDEL + ']+')), ('ATTR_RDEL', cmp('[' + ATTR_RDEL + ']+')), ('PARAM_DEL', cmp('[' + PARAM_DEL + ']+'))])
    
    rootTag = "tagpholder"
    rootTagStck = [rootTag]
    pattern = regexPattern.strip('(?#)')

    npos = 1
    pmax = len(pattern) - 1
    tags = {rootTag:{}}
    varList = []
    TAG, npos = getCharsNotInSet(pattern, 'TAG_PATT', npos)
    if TAG[0] == '(' and TAG[-1] == ')':
        tagPattern = TAG[1:-1]
        tags[rootTag]['__TAG__'] = ''
        varList.append([rootTag + ATTRSEP + '__TAG__', '__TAG__'])
    else:
        tagPattern = TAG
    if tagPattern == '__TAG__': tagPattern = '[a-zA-Z][^\s>]*'
    ATTR = PARAM = VAR = ''
    while 1:
        sep, npos = skipCharsInSet(pattern, 'PATH_RES', npos)
        while sep:
            if len(rootTagStck) == 1:
                v = 'Closing curly brace without corresponding opening curly brace'
                raise re.error(v)
            rootTagStck.pop()
            rootTag = '.'.join(rootTagStck)
            sep = sep[:-1]
        sep, npos = skipCharsInSet(pattern, 'ATTR_LDEL', npos)
        
        if npos >= pmax: break
        ATTR, npos = getCharsNotInSet(pattern, 'ATTR_PATT', npos)

        PARAM = VAR = ''
        sep, npos = skipCharsInSet(pattern, 'ATTR_RDEL', npos)
        
        if sep == PATH_MOD:
            rootTagStck.append(ATTR)
            rootTag = ATTRSEP.join(rootTagStck)
            continue

        if sep[0] in PATH_MOD + EQ and len(sep) > 1:
            v = 'the pattern "%s" is not allowed ' % sep
            raise re.error(v)
 
        attrName = ATTR.rstrip(')').lstrip('(')
        if attrName != ATTR:
            if len(attrName) - len(ATTR) == 1:
                v = 'unbalanced parenthesis'
                raise re.error(v)
            if attrName[0] == ATTRSEP and ATTRSEP not in attrName[1:]:
                v = 'Required tags not allowed as variables'
                raise re.error(v)
            VAR = "group" + str(1 + len(varList)) 
        pathKey, psep, attrKey = attrName.rpartition(ATTRSEP)
        if not pathKey and psep:
            pathKey = rootTag + 2*ATTRSEP + attrKey
            attrKey = ''
        else:
            pathKey = rootTag + psep + pathKey
        pathKey = re.sub(r'[.](\d+)(?=[.]*)', r'[\1]', pathKey).replace('[1]', '')
        tags.setdefault(pathKey, {})
        if attrKey:
            tagDict = tags[pathKey]
            tagDict.setdefault(attrKey, '')
            if VAR:
                varName = VAR
                attrName = pathKey + ATTRSEP + attrKey
                varList.append([attrName, varName])
            
        if sep == "=":
            nt = n1 = n2 = 0
            while 1:
                nt += 1
                if nt == 3:
                    v = 'Triple asignation not allowed'
                    raise re.error(v)
                sep, npos = skipCharsInSet(pattern, 'PARAM_DEL', npos)
                
                if sep:
                    n1 += 1
                    if n1 == 2:
                        v = 'Double attr value not allowed'
                        raise re.error(v)
                    posfin = npos
                    attr = sep
                    while 1:
                        attrInc, posfin = getCharsNotInSet(pattern, 'PARAM_PATT', posfin)
                        if attrInc[-1] in '=>': break
                        attr += attrInc
                        sep, posfin = skipCharsInSet(pattern, 'PARAM_DEL', posfin)
                        attr += sep
                    PARAM = attr[1:-1]
                    
                    if attrKey != '__EZONE__':
                        tagDict[attrKey] = re.compile(PARAM + r'\Z', re.DOTALL)
                    else:
                        tagDict[attrKey] = PARAM
                    
                    if len(PARAM): npos += len(PARAM) + 1
                    if pattern[npos] != '=': break
                    npos += 1
                    continue
                if not sep:
                    n2 += 1
                    if n2 == 2:
                        v = 'Double variable asignation to the same attribute value not allowed'
                        raise re.error(v)
                    VAR, npos = getCharsNotInSet(pattern, 'VAR_PATT', npos)

                    if not attrKey:
                        v = 'Store tags as variables is not allowes' 
                        raise re.error(v)
                    attrName = pathKey + ATTRSEP + attrKey
                    varName = VAR.strip('&')
                    if attrName in map(operator.itemgetter(0), varList):
                        v = 'reassigment of attribute '+ attrName + ' to var ' + varName 
                        raise re.error(v)
                    if varName in map(operator.itemgetter(1), varList):
                        v = 'redefinition of group name '+ varName + ' as group ' + str(len(varList) + 1)
                        raise re.error(v)
                    varList.append([attrName, varName])
                    if VAR != varName:
                        tagDict[attrKey] = re.compile("\s*([ \S]*?)\s*" + r'\Z')
                    
                    if pattern[npos] != '=': break
                    npos += 1
        
    if len(rootTagStck) > 1:
        v = 'Unbalanced curly braces'
        raise re.error(v)
    
    if 'tagpholder..*' in tags:
        if not varList:
            tags.pop('tagpholder..*')
            varList.append(['tagpholder..*', 'text_tags'])
        else:
            v = 'With required Tag ".*", var are not allowed'
            raise re.error(v)

    stags = tags[rootTag].get('__EZONE__','[!--|script]')
    if tags[rootTag].has_key('__EZONE__'): tags[rootTag].pop('__EZONE__')
    incFlag = not stags.startswith('^[')
    stags = stags.strip("[^]").split('|') if stags else []
    ezones = []
    for stag in stags:
        if stag == '!--':
            stag = '<' + stag
            etag = '-->'
        else:
            etag = '</' + stag + '>'
            stag = '<' + stag
        ezones.append([stag, etag, 0, 0])
        
    return ExtRegexObject(regexPattern, flags, tagPattern, tags, varList, (ezones, incFlag))
예제 #45
0
def main():
    """
        Reads a line from stdin and expect it to be a wellformed Babeltrace CLI
        output containing containing a callstack context of the domain passed
        as argument.
    """
    expected_callstack = set()
    recorded_callstack = set()
    cs_type=None

    if len(sys.argv) <= 2:
        print(sys.argv)
        raise ValueError('USAGE: ./{} (--kernel|--user EXE) FUNC-NAMES'.format(sys.argv[0]))

    # If the `--user` option is passed, save the next argument as the path
    # to the executable
    argc=1
    executable=None
    if sys.argv[argc] in '--kernel':
        rexp = kernel_cs_rexp
        cs_type='kernel'
    elif sys.argv[argc] in '--user':
        rexp = user_cs_rexp
        cs_type='user'
        argc+=1
        executable = sys.argv[argc]
    else:
        raise Exception('Unknown domain')

    argc+=1

    # Extract the function names that are expected to be found call stack of
    # the current events
    for func in sys.argv[argc:]:
        expected_callstack.add(func)

    # Read the tested line for STDIN
    event_line = None
    for line in sys.stdin:
        event_line = line
        break

    # Extract the userspace callstack context of the event
    m = re.match(rexp, event_line)

    # If there is no match, exit with error
    if m is None:
        raise re.error('Callstack not found in event line')
    else:
        raw_callstack = str(m.group(1))
        if cs_type in 'user':
            recorded_callstack=extract_user_func_names(executable, raw_callstack)
        elif cs_type in 'kernel':
            recorded_callstack=extract_kernel_func_names(raw_callstack)
        else:
            raise Exception('Unknown domain')

    # Verify that all expected function are present in the callstack
    for e in expected_callstack:
        if e not in recorded_callstack:
            raise Exception('Expected function name not found in recorded callstack')

    sys.exit(0)
예제 #46
0
파일: gtgrep.py 프로젝트: askhl/pyg3t
 def re_compile(pattern, name):
     try:
         return re.compile(pattern, re.UNICODE|flags)
     except re.error as err:
         raise re.error('bad %s pattern "%s": %s' % (name, pattern,
                                                     err))