Python re_split Examples, re.re_split Python Examples

Example #1

0

Show file

 def natural_keys(text):
     """
     alist.sort(key=natural_keys) sorts in human order
     http://nedbatchelder.com/blog/200712/human_sorting.html
     (See Toothy's implementation in the comments)
     """
     return [atoi(c) for c in re_split(r"(\d+)", text)]

Example #2

0

Show file

File: utils.py Project: cmotadev/cprm-cartao

def tokenize(text):
    """
    :param text:
    :return: a list of tokens
    """
    is_preposition = lambda _w: _w in [
        "de", "da", "do", "em", "e", "dos", "das"
    ]

    result = []
    prep = None

    # Remove (a)
    words = filter(lambda x: bool(x),
                   re_split('\W+',
                            text.lower().replace(r"(a)", "")))

    for word in words:
        if not is_preposition(word):
            word = word.title()
            if prep:
                word = "%s %s" % (prep, word)
            prep = None

            result.append(word.strip())

        else:
            prep = word

    return result

Example #3

0

Show file

File: server.py Project: robburrows/screenly-ose

def system_info():
    viewer_log_file = "/tmp/screenly_viewer.log"
    if path.exists(viewer_log_file):
        viewlog = check_output(["tail", "-n", "20", viewer_log_file]).split("\n")
    else:
        viewlog = ["(no viewer log present -- is only the screenly server running?)\n"]

    # Get load average from last 15 minutes and round to two digits.
    loadavg = round(getloadavg()[2], 2)

    try:
        run_tvservice = check_output(["tvservice", "-s"])
        display_info = re_split("\||,", run_tvservice.strip("state:"))
    except:
        display_info = False

    # Calculate disk space
    slash = statvfs("/")
    free_space = size(slash.f_bavail * slash.f_frsize)

    # Get uptime
    uptime_in_seconds = uptime()
    system_uptime = timedelta(seconds=uptime_in_seconds)

    return template(
        "system_info",
        viewlog=viewlog,
        loadavg=loadavg,
        free_space=free_space,
        uptime=system_uptime,
        display_info=display_info,
    )

Example #4

0

Show file

 def codes(self, _):
     if self.url == "":
         return u"Сначала надо войти в движок"
     try:
         answer = self.get_dzzzr()
     except Exception as e:
         return unicode(e.message)
     message = None
     try:
         message = answer.find("strong",
                               string=re_compile(u"Коды сложности")).parent
     except Exception:
         return u"Коды сложности не найдены"
     if message:
         message = unicode(message).split(u"Коды сложности")[1]
         sectors = re_split("<br/?>", message)[:-1]
         result = []
         for sector in filter(len, sectors):
             try:
                 sector_name, _, codes = sector.partition(u":")
                 _, __, codes = codes.partition(u":")
                 if not codes:
                     continue
                 codes = filter(
                     lambda c: u"span" not in c[1],
                     enumerate(codes.strip().split(u", "), start=1))
                 result.append(u"%s (осталось %s): %s" %
                               (sector_name, len(codes), u", ".join(
                                   map(lambda t: u"%s (%s)" %
                                       (t[0], t[1]), codes))))
             except Exception:
                 pass
         return u"\n".join(result)
     return u"Нет ответа"

Example #5

0

Show file

File: server.py Project: Geo-Joy/sync-pi-ose

def system_info():
    viewer_log_file = '/tmp/sync_viewer.log'
    if path.exists(viewer_log_file):
        viewlog = check_output(['tail', '-n', '20', viewer_log_file]).split('\n')
    else:
        viewlog = ["(no viewer log present -- is only the sync server running?)\n"]

    # Get load average from last 15 minutes and round to two digits.
    loadavg = round(getloadavg()[2], 2)

    try:
        run_tvservice = check_output(['tvservice', '-s'])
        display_info = re_split('\||,', run_tvservice.strip('state:'))
    except:
        display_info = False

    # Calculate disk space
    slash = statvfs("/")
    free_space = size(slash.f_bavail * slash.f_frsize)

    # Get uptime
    uptime_in_seconds = uptime()
    system_uptime = timedelta(seconds=uptime_in_seconds)

    return template('system_info', viewlog=viewlog, loadavg=loadavg, free_space=free_space, uptime=system_uptime, display_info=display_info)

Example #6

0

Show file

def system_info():
    viewer_log_file = '/tmp/screenly_viewer.log'
    if path.exists(viewer_log_file):
        viewlog = check_output(['tail', '-n', '20',
                                viewer_log_file]).split('\n')
    else:
        viewlog = [
            "(no viewer log present -- is only the screenly server running?)\n"
        ]

    # Get load average from last 15 minutes and round to two digits.
    loadavg = round(getloadavg()[2], 2)

    try:
        run_tvservice = check_output(['tvservice', '-s'])
        display_info = re_split('\||,', run_tvservice.strip('state:'))
    except:
        display_info = False

    # Calculate disk space
    slash = statvfs("/")
    free_space = size(slash.f_bavail * slash.f_frsize)

    # Get uptime
    uptime_in_seconds = uptime()
    system_uptime = timedelta(seconds=uptime_in_seconds)

    return template('system_info',
                    viewlog=viewlog,
                    loadavg=loadavg,
                    free_space=free_space,
                    uptime=system_uptime,
                    display_info=display_info)

Example #7

0

Show file

    def list_dir(path, force=False):
        """
            Reads a directory with a shell call to dir.
            Truthiness of bool force determines whether 
            hidden items are returned or not. (For Windows)
        """

        path = sub("/", "\\\\", path)

        if force:
            dir_listing = run(["dir", path, "/b", "/a"],
                              shell=True,
                              capture_output=True)

        else:
            dir_listing = run(["dir", path, "/b"],
                              shell=True,
                              capture_output=True)

        output = dir_listing.stdout
        err = dir_listing.stderr

        if not output:
            return []

        if err:
            err = err.decode("utf-8")
            raise Exception(err)

        str_output = output.decode("utf-8")
        list_output = re_split("\r\n", str_output)

        return sorted([item for item in list_output if item])

Example #8

0

Show file

File: models.py Project: cmotadev/cprm-cartao

    def _clean_all(self):
        """
        :return:
        """
        # strips, if not formatted
        self.email = str(self.email).strip()
        self.company = str(self.company).strip()

        # Force Format names
        for prop in ["name", "title", "street", "city"]:
            val = getattr(self, prop)
            setattr(self, prop, format_name(val))

        # Force Upper or cases
        self.state_code = self.state_code.upper()
        self.email = self.email.lower()

        # Apply masks
        self.box = "%s-%s" % (self.box[:5], self.box[5:])
        # self.work_phone
        # self.cell_phone

        # Special cases
        self.company = format_name(re_split(r"\||\/+", self.company).pop())

        self._clean = True
        return

Example #9

0

Show file

File: parsepage.py Project: dlobba/parsewiki

def parse_text(text_node):
    """Separate words in text without keeping spaces."""
    if text_node.value is None:
        return None
    words = re_split("[^\w]", text_node.value.strip())
    # clean empty strings
    words = [word for word in words if len(word) > 0]
    return words

Example #10

0

Show file

File: bettertranslate.py Project: truebit/PopClip-Extensions

def google_tts(text, tl='en', ip_addr=None):
    """
    this function is adapted from https://github.com/hungtruong/Google-Translate-TTS, thanks @hungtruong.
    """
	#process text into chunks
    text = text.replace('\n','')
    text_list = re_split('(\,|\.)', text)
    combined_text = []
    for idx, val in enumerate(text_list):
        if idx % 2 == 0:
            combined_text.append(val)
        else:
            joined_text = ''.join((combined_text.pop(),val))
            if len(joined_text) < 100:
                combined_text.append(joined_text)
            else:
                subparts = re_split('( )', joined_text)
                temp_string = ""
                temp_array = []
                for part in subparts:
                    temp_string = temp_string + part
                    if len(temp_string) > 80:
                        temp_array.append(temp_string)
                        temp_string = ""
                #append final part
                temp_array.append(temp_string)
                combined_text.extend(temp_array)
    #download chunks and write them to the output file
    f = NamedTemporaryFile(delete=False)
    host = ip_addr if ip_addr else "translate.google.com"
    headers = {"Host":"translate.google.com",
      "Referer":"http://www.gstatic.com/translate/sound_player2.swf",
      "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.94 Safari/537.36"}
    for idx, val in enumerate(combined_text):
        mp3url = "http://%s/translate_tts?tl=%s&q=%s&total=%s&idx=%s" % (host, tl, quote(val), len(combined_text), idx)
        req = Request(mp3url, headers=headers)
        if len(val) > 0:
            try:
                response = urlopen(req)
                f.write(response.read())
            except HTTPError as e:
                pass
    f.close()
    system('afplay {0}'.format(f.name))
    unlink(f.name)

Example #11

0

Show file

File: kitnarchive.py Project: relsqui/archivebot

 def archive(self):
     factoids = re_split(r'( or |\|)', self.info)[::2]
     formatted_info = "\n\n{} recorded on {} that {} is:\n* ".format(self.controller.config.get('server', 'nick'), datetime.today().date(), self.waiting) + "\n* ".join(factoids)
     try:
         page_url = self.append_page(formatted_info)
         self.controller.client.reply(self.requester[1], self.requester[0], "Done! {} -- I didn't delete the bot entry, just to be safe; please make sure I copied everything right, then do so.".format(page_url))
     except ResourceNotFoundError:
         self.controller.client.reply(self.requester[1], self.requester[0], "Sorry, that wiki page doesn't exist yet.")
     self.clear()

Example #12

0

Show file

    def _handle_add_delete_community(self, user_message: str) -> None:
        """Обрабатывает запрос пользователя на добавление или удаление сообщества или сообществ.

        Parameters
        ----------
        user_message: str
            сообщение пользователя
        """
        if user_message == 'отмена':
            VkBotStatus.set_state(self._user_id, States.NONE)
            self.send_message(f"Бот больше не {choice(['cлушает', 'внимает'])}... ಠ╭╮ಠ")
        else:
            communities_links = [i.strip() for i in re_split(', | |\n', user_message)]

            communities_names = []
            for i in communities_links:
                if search(r'(?:(?:https?|ftp|http?)://)?[\w/\-?=%.]+\.[\w/\-&?=%.]+', i):
                    communities_names.append(str(findall(r'(?:(?:https?|ftp|http?)://)?[\w/\-?=%.]+\.[\w/\-&?=%.]+',
                                                         i)[0]).split("/")[-1])
            communities_numbers = []
            if VkBotStatus.get_state(self._user_id) == States.DELETE_COMMUNITY and len(communities_names) == 0:
                for i in communities_links:
                    try:
                        if int(i) - 1 >= 0:
                            communities_numbers.append(int(i) - 1)
                    except ValueError:
                        pass
                if len(communities_numbers) != 0:
                    comm_from_me, list_comm_urls = self._functions.show_users_communities(self._vk_session_user,
                                                                                          show_url=True)
                    if comm_from_me:
                        for i in communities_numbers:
                            if i < len(list_comm_urls):
                                communities_names.append(list_comm_urls[i].split("/")[-1])

            if len(communities_names) == 0 and len(communities_numbers) == 0:
                self._create_cancel_menu(message="Ссылки на сообщество" +
                                                 (' или его номера' if VkBotStatus.get_state(
                                                     self._user_id) == States.DELETE_COMMUNITY else '') +
                                                 " не найдено!")
            else:
                try:
                    result = self._functions.change_users_community(self._vk_session_user,
                                                                    VkBotStatus.get_state(
                                                                        self._user_id) == States.DELETE_COMMUNITY,
                                                                    communities_names)
                    VkBotStatus.set_state(self._user_id, States.NONE)
                    if len(communities_names) == 1:
                        self.send_message(message=f"Сообщество {'удалено' if result else 'сохранено'}!")
                    else:
                        self.send_message(message=f"Сообщества {'удалены' if result else 'сохранены'}!")
                except ApiError:
                    if len(communities_names) == 1:
                        bot_message = "Неправильно указана ссылка на сообщество!"
                    else:
                        bot_message = "Неправильно указаны ссылки на сообщества!"
                    self._create_cancel_menu(message=bot_message)

Example #13

0

Show file

    def _normalize(self, sep):
        '''Replaces whitespace and punctuation characters with a a configurable 'code
        separator'. Alphabetic characters are converted to upper case.
        '''

        pattern = '[{}{}]'.format(whitespace, punctuation)
        self._normalized_text = sep.join([
            token for token in re_split(pattern, self.text) if token.isalnum()
        ]).upper()

Example #14

0

Show file

    def split(self, data):
        """
        Split the reference of the given regex.

        :param str data: The data to work with.
        :rtype: list
        """

        return re_split(self.regex, data)

Example #15

0

Show file

File: LDRFileTree.py Project: uchicago-library/uchicago-ldr-sips

    def findWords(self, string):
        from re import compile as re_compile, split as re_split

        pattern = re_compile("\b?\w+\b")
        matches = re_split(pattern, string)
        if matches:
            return matches[0].split(" ")
        else:
            raise ValueError("%s did not have any recognizable single words" + " in the filename" % string)

Example #16

0

Show file

File: text.py Project: percona/documentation-library

    def simplify(line):
        '''Simplifies the supplied line to enable effective comparision in such a way
        that punctuation, whitespace, and case differences are not accounted for.'''

        line = line.rstrip()
        pattern = "[{}{}]".format(whitespace, punctuation)
        processed = [each for each in re_split(pattern, line) if each]

        return ''.join(processed).lower(), line

Example #17

0

Show file

 def parse_line(cls, line, create=True, **kwargs):
     tags = []
     for possible_tag in re_split('[,\s]+', line):
         tag = cls.get_by_name(possible_tag.lower(),
                               create=create,
                               **kwargs)
         if tag:
             tags.append(tag)
     return tags

Example #18

0

Show file

File: texttable_misc.py Project: ZsemberiDaniel/facebook_msg_parser

def get_string_wrapped(table: Texttable, size: int, fixed_col=0) -> str:
    """
    Only works with deco types of VLINES and default. Splits the table to multiple lines with a maximum size of
    'size'. Then returns the string which would be returned by draw
    :param table: Table to split
    :param size: What is the maximum size in characters
    :param fixed_col: If you want some columns from the start to be fixed at the start of each line then pass how
    many you want fixed
    :return: A string of the table split
    """
    out: [str] = []
    line_i = 0
    for line in table.draw().splitlines():
        # at the first line we append hopefully enough strings to out
        if line_i is 0:
            for i in range(int(ceil(len(line) / size)) * 2):
                out.append("")

        split = re_split(r"[\+\|]", line)
        curr_length = 0
        curr_line = 0

        # we need to skip the empty string ones in the front in case there are any
        real_fixed_col = fixed_col
        while split[real_fixed_col - fixed_col] is "":
            real_fixed_col += 1
        fixed_in_line = split[:real_fixed_col]

        # go through the cols in this line of table
        for col in split:
            # we can fit this col in the current line
            if curr_length + len(col) + 1 < size:
                out[curr_line] += col + "|"
            else:
                # we can no longer fit this col in the current line -> append it to the next one
                curr_line += 1

                # but first append the fixed cols
                if fixed_col > 0:
                    fixed_string = "|".join(fixed_in_line) + "|"
                    out[curr_line] += fixed_string
                    curr_length = len(fixed_string)  # we set this here
                else:
                    curr_length = 0  # we haven't set it in if, but we still need to reset it

                out[curr_line] += col + "|"

            curr_length += len(col) + 1

        line_i += 1
        # append a line break at the end of each line
        for i in range(len(out)):
            out[i] += "\n"

    return "\n".join(list(filter(lambda s: s.replace("\n", "") is not "",
                                 out)))

Example #19

0

Show file

def convertHHMMtoSec(hhmm):
    vals = re_split(":", hhmm)
    if len(vals) == 2:
        h, m = vals[0], vals[1]
        s = 0
    elif len(vals) == 3:
        h, m, s = vals[0], vals[1], vals[2]
    else:
        raise Exception("not well formatted time string")
    return float(timedelta(hours=int(h), minutes=int(m), seconds=int(s)).total_seconds())

Example #20

0

Show file

File: classifier.py Project: Ne88ie/CSC_2014

 def get_features(self, source):
     """
     Feature extraction from text. The point where you can customize features.
     source - opened file
     return feature set, iterable object
     """
     words = []
     for line in source:
         if self.kind_of_partition == 'word':
             words.extend([word
                           for word in re_split('[\s,.:;!?<>+="()%\-0-9d]', line.decode("utf-8").lower().encode("utf-8"))
                           if word and (not self.est_words or word in self.est_words)
                           and word not in self.stopwords])
                           # not is_bad(word.decode("utf-8"))])
         elif self.kind_of_partition == 'ngram':
             for word in re_split('[\s,.:;!?<>+="()%\-]', line.decode("utf-8").lower().encode("utf-8")):
                 if word and word not in self.stopwords and word not in self.stopwords:
                     words.extend(re_findall('.{1,%d}' % self.ngram, word))
     return words

Example #21

0

Show file

File: constants.py Project: percona/documentation-library

    def _normalize(self, sep_char, text):
        '''Replaces all whitespace and punctuation characters in the supplied
        text with the given separator. Consecutive replaceable characters are
        reduced to one occurance.'''

        # TODO: This method is identical to DetectOperation._simplify
        #       Refactor to use a common function or method

        pattern = "[{}{}]".format(whitespace, punctuation)
        processed = [each for each in re_split(pattern, text) if each]
        return sep_char.join(processed)

Example #22

0

Show file

File: serializer.py Project: peritus/hashedassets

    def deserialize(cls, string):
        result = {}
        for line in string.split("\n"):
            if line == '':
                continue

            _, key, value, _ = re_split("(?<=[^\\\])/", line)
            key = cls._escape_filename(key.strip(), True)
            value = cls._escape_filename(value.strip(), True)
            result[key] = value
        return result

Example #23

0

Show file

File: models.py Project: Zauberstuhl/pyaspora

 def parse_line(cls, line, create=True, **kwargs):
     tags = []
     for possible_tag in re_split('[,\s]+', line):
         tag = cls.get_by_name(
             possible_tag.lower(),
             create=create,
             **kwargs
         )
         if tag:
             tags.append(tag)
     return tags

Example #24

0

Show file

File: linebreaksbrpre.py Project: limsungkee/yonseics

def linebreaksbrpre(str):
  # pre부분만 빼고...전부 바꿔주자.
  splited = re_split('</pre>',str)
  # 맨 마지막꺼 빼고 앞부분은 전부 <pre가 있는거렷다.
  lastStr = splited.pop()
  ret = []
  for splited_str in splited:
    # 여기서 앞부분것들만 pre에 적용을 안 받는 녀석들이다.
    pre_splited = re_split('<pre',splited_str)
    ret.append(linebreaksbr(pre_splited[0]))
    ret.append("<pre")
    if (len(pre_splited) > 1):
      split_content = pre_splited[1].split('>', 1)
    ret.append(split_content[0])  # <pre ~~~> 뒷부분
    ret.append('>')
    if (len(split_content) > 1):
      ret.append(split_content[1].replace("<", "&lt;").replace('>', '&gt;'))  # <pre>~~~</pre> 내용
    ret.append("</pre>")
  ret.append(linebreaksbr(lastStr))
  return "".join(ret)

Example #25

0

Show file

File: linebreaksbrpre.py Project: limsungkee/yonseics

def linebreaksbrpre(str):
    # pre부분만 빼고...전부 바꿔주자.
    splited = re_split('</pre>', str)
    # 맨 마지막꺼 빼고 앞부분은 전부 <pre가 있는거렷다.
    lastStr = splited.pop()
    ret = []
    for splited_str in splited:
        # 여기서 앞부분것들만 pre에 적용을 안 받는 녀석들이다.
        pre_splited = re_split('<pre', splited_str)
        ret.append(linebreaksbr(pre_splited[0]))
        ret.append("<pre")
        if (len(pre_splited) > 1):
            split_content = pre_splited[1].split('>', 1)
        ret.append(split_content[0])  # <pre ~~~> 뒷부분
        ret.append('>')
        if (len(split_content) > 1):
            ret.append(split_content[1].replace("<", "&lt;").replace(
                '>', '&gt;'))  # <pre>~~~</pre> 내용
        ret.append("</pre>")
    ret.append(linebreaksbr(lastStr))
    return "".join(ret)

Example #26

0

Show file

    def post(self, request):
        """
        Gets a POST request with data of a new order from AJAX,
        adds it to the database and calls sending to a pizzeria class.
        Returns JSON with a slug, that jQuery uses to redirecting.
        """
        # Getting information
        orderer_name = request.POST['name']
        orderer_phone_number = request.POST['phone_number']
        orderer_address = request.POST['address']
        pizza = request.POST['pizza']
        drink = request.POST['drink']
        pizzeria = request.POST['pizzeria']
        method_of_payment = request.POST['payment_method']
        date_of_delivery_list = re_split('\W', request.POST['delivery_date'])
        date_of_delivery = datetime(year=int(date_of_delivery_list[0]),
                                    month=int(date_of_delivery_list[1]),
                                    day=int(date_of_delivery_list[2]),
                                    hour=int(date_of_delivery_list[3]),
                                    minute=int(date_of_delivery_list[4]))

        # Adding the data to the database.
        new_order = PizzaOrder.objects.create(
            orderer_name=orderer_name,
            orderer_phone_number=orderer_phone_number,
            orderer_address=orderer_address,
            pizza=pizza,
            drink=drink,
            pizzeria=pizzeria,
            method_of_payment=method_of_payment,
            date_of_delivery=date_of_delivery)

        # Sending the order to a pizzeria, getting a status of it and
        # adding the status to the data in the database.
        try:
            new_order.status = Pizzerias(
                slug=new_order.slug,
                orderer_name=orderer_name,
                orderer_phone_number=orderer_phone_number,
                orderer_address=orderer_address,
                pizza=pizza,
                drink=drink,
                pizzeria=pizzeria,
                method_of_payment=method_of_payment,
                date_of_delivery=date_of_delivery,
                date_time_of_order=new_order.date_time_of_order
            ).send_order_to_pizzeria()
        except:
            new_order.status = 'There was an error in sending to the pizzeria'
        new_order.save()

        return JsonResponse({'slug': new_order.slug})

Example #27

0

Show file

def _format(storage, string, pattern, padding):
    # Split content by lines and leading indentation
    linedata = iter(re_split(pattern, string))
    # Add first line to content
    storage.append(_LONG*' ' + next(linedata))
    # For each padding and content of line
    for space, data in zip(linedata, linedata):
        # Normalise leading padding if necessary
        indent = len(space) - padding
        storage.append((_LONG + (indent if indent > 0 else 0))*' ' + data)
    # If last line of comment has no new line at the end, append one
    if not storage[-1].endswith('\n'):
        storage[-1] += '\n'

Example #28

0

Show file

def add_split(line, punctuation=[' ', '-', r'\.']):
    """Split the line on the punctuation and return elements longer then 1 char.

    Param:
        line (unicode)
    Returns:
        split line
    """
    for p in punctuation:
        if p in line:
            return [
                i for i in re_split('|'.join(punctuation), line) if len(i) > 1
            ]
    return False

Example #29

0

Show file

File: SnapComm.py Project: danwangkoala/vera_chil

 def read(self, delay=0):
     """
     read one command from SNAP system
     """
     sleep(delay)
     stderr.write("___________READ___________\n")
     line = self.process.stdout.readline()
     read = line
     while read != "X\n":
         stderr.write("INFO:" + read)
         line = read
         read = self.process.stdout.readline()
     # stderr.write("HERE:" + str(re_split(r"[\n ]+",line)[0]) + "/end")
     return re_split(r"[\n ]+", line)[0]

Example #30

0

Show file

    def parse_line(cls, line, create=True, **kwargs):
        tags = []
        seen = set()
        for possible_tag in re_split('[,\s]+', line):
            possible_tag = possible_tag.lower()

            if possible_tag in seen:
                continue
            else:
                seen.add(possible_tag)

            tag = cls.get_by_name(possible_tag, create=create, **kwargs)
            if tag:
                tags.append(tag)
        return tags

Example #31

0

Show file

File: text.py Project: percona/documentation-library

 def split_at_token(paragraph, end_marks=None, sep_chars=None):
     '''Transform the lines of a paragraph into a collection of sentences.
     
     All lines are joined together into a string and then split using the
     traditional or supplied sentence markers.
     '''
     fragments = []
     pattern = ''
     end_marks = set(end_marks or SENTENCE_END_MARKS)
     sep_chars = set(sep_chars or SENTENCE_SEP_CHARS)
     pattern = end_marks and sep_chars and '|'.join([
         ''.join(['[{}]'.format(mark), sep_char]) for mark in end_marks
         for sep_char in sep_chars
     ])
     fragments = re_split(pattern, paragraph)
     return [_ for _ in fragments if _]

Example #32

0

Show file

File: kitnarchive.py Project: nibalizer/archivebot

 def archive(self):
     factoids = re_split(r'( or |\|)', self.info)[::2]
     formatted_info = "\n\n{} recorded on {} that {} is:\n* ".format(
         self.controller.config.get('server', 'nick'),
         datetime.today().date(), self.waiting) + "\n* ".join(factoids)
     try:
         page_url = self.append_page(formatted_info)
         self.controller.client.reply(
             self.requester[1], self.requester[0],
             "Done! {} -- I didn't delete the bot entry, just to be safe; please make sure I copied everything right, then do so."
             .format(page_url))
     except ResourceNotFoundError:
         self.controller.client.reply(
             self.requester[1], self.requester[0],
             "Sorry, that wiki page doesn't exist yet.")
     self.clear()

Example #33

0

Show file

File: simplehmmer.py Project: ctSkennerton/SimpleHMMER

    def readHitsTBL(self):
        """Look for the next hit in tblout format, package and return"""
        """
We expect line to look like:
NODE_110054_length_1926_cov_24.692627_41_3 -          Ribosomal_S9         PF00380.14   5.9e-48  158.7   0.0   6.7e-48  158.5   0.0   1.0   1   0   0   1   1   1   1 # 1370 # 1756 # 1 # ID=41_3;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None
        """
        while (1):
            line = self.handle.readline().rstrip()
            try:
                if line[0] != '#' and len(line) != 0:
                    dMatch = re_split( r'\s+', line.rstrip() )
                    if len(dMatch) < 19:
                        raise FormatError( "Something is wrong with this line:\n%s" % (line) )
                    refined_match = dMatch[0:18] + [" ".join([str(i) for i in dMatch[18:]])]
                    return HmmerHitTBL(refined_match)
            except IndexError:
                return {}

Example #34

0

Show file

File: simplehmmer.py Project: ctSkennerton/SimpleHMMER

    def readHitsDOM(self):
        """Look for the next hit in domtblout format, package and return"""
        """
We expect the line to look like:
NODE_925902_length_6780_cov_18.428171_754_2 -            399 PGK                  PF00162.14   384  2.2e-164  543.7   0.1   1   1  1.3e-167  2.5e-164  543.5   0.1     1   384     9   386     9   386 1.00 # 1767 # 2963 # -1 # ID=754_2;partial=00;start_type=ATG;rbs_motif=AGGA;rbs_spacer=5-10bp
        """
        while (1):
            line = self.handle.readline().rstrip()
            try:
                if line[0] != '#' and len(line) != 0:
                    dMatch = re_split( r'\s+', line.rstrip() )
                    if len(dMatch) < 23:
                        raise FormatError( "Something is wrong with this line:\n%s" % (line) )
                    refined_match = dMatch[0:22] + [" ".join([str(i) for i in dMatch[22:]])]
                    return HmmerHitDOM(refined_match)
            except IndexError:
                return {}

Example #35

0

Show file

File: hardware.py Project: sma-wideband/phringes

 def handle(self):
     self._logger.debug('handle')
     request = self.request.recv(MAX_REQUEST_SIZE)        
     if request:
         self._logger.debug('request of size %d (%s)'%(len(request), b2a.hexlify(request[:8])))
         args = re_split(self.string_separator, request[1:]) #TODO-3 ??? figure out way to use self.string_separator(should work now)
         command = unpack('>b', request[0])[0]
         method = self.server._command_set.get(command)
         if method:
             response = self.server._command_set[command](*args)
         else:
             self._logger.error('no such command word %d!'%command)
             response = pack('>b', -1)
     else:
         self._logger.error('null packet received!')
         response = pack('>b', -2)
     self.request.send(response)

Example #36

0

Show file

File: simplehmmer.py Project: kulkarnik/SimpleHMMER

    def readHitsDOM(self):
        """Look for the next hit in domtblout format, package and return"""
        """
We expect the line to look like:
NODE_925902_length_6780_cov_18.428171_754_2 -            399 PGK                  PF00162.14   384  2.2e-164  543.7   0.1   1   1  1.3e-167  2.5e-164  543.5   0.1     1   384     9   386     9   386 1.00 # 1767 # 2963 # -1 # ID=754_2;partial=00;start_type=ATG;rbs_motif=AGGA;rbs_spacer=5-10bp
        """
        while (1):
            line = self.handle.readline().rstrip()
            try:
                if line[0] != '#' and len(line) != 0:
                    dMatch = re_split( r'\s+', line.rstrip() )
                    if len(dMatch) < 23:
                        raise FormatError( "Something is wrong with this line:\n%s" % (line) )
                    refined_match = dMatch[0:22] + [" ".join([str(i) for i in dMatch[22:]])]
                    return HmmerHitDOM(refined_match)
            except IndexError:
                return {}

Example #37

0

Show file

def _evaluated_value(value):
    if value == '' or not value:
        return ''
    if not isinstance(value, str):
        return value
    isList = compile(r"^\[.+\]$")
    # dirty list case
    if findall(isList, value):
        result = re_split(r', ?', value.strip(']['))
        return [expandvars(ele) for ele in result]
    try:
        # all cases of bool, int or others.
        if isinstance(eval(value), bool) or isinstance(eval(value), Number):
            return eval(value)
    except (NameError, SyntaxError):
        pass
    return expandvars(value)

Example #38

0

Show file

File: utility.py Project: jslatte/tartaros

def checksum(sentence):
    """ Calculate the checksum for a sentence (e.g. NMEA string). """

    result = {'checksum':None}
    # Remove any newlines
    if re_search("\n$", sentence):
        sentence = sentence[:-1]

    nmeadata,cksum = re_split('\*', sentence)

    calc_cksum = 0
    for s in nmeadata:
        calc_cksum ^= ord(s)

    # Return the nmeadata, the checksum from sentence, and the calculated checksum
    result['checksum'] = hex(calc_cksum)[2:].upper()
    return result

Example #39

0

Show file

File: classify_stew.py Project: mkwarman/Active-Passive-Attention-3DCNN-Classification

def get_filename_label_dict(filenames):
    filename_label_dict = {}

    for filename in filenames:
        """
        Filenames will be in the format subjectN_L.txt
        where N represents subject number and L represents
        the action the subject was taking at the time (label)

        filename_parts contains the filename split on
        underscores and periods
        """
        filename_parts = re_split(FILENAME_REGEX, filename)
        label = filename_parts[1]
        filename_label_dict[filename] = label

    return filename_label_dict

Example #40

0

Show file

File: simplehmmer.py Project: kulkarnik/SimpleHMMER

    def readHitsTBL(self):
        """Look for the next hit in tblout format, package and return"""
        """
We expect line to look like:
NODE_110054_length_1926_cov_24.692627_41_3 -          Ribosomal_S9         PF00380.14   5.9e-48  158.7   0.0   6.7e-48  158.5   0.0   1.0   1   0   0   1   1   1   1 # 1370 # 1756 # 1 # ID=41_3;partial=00;start_type=ATG;rbs_motif=None;rbs_spacer=None
        """
        while (1):
            line = self.handle.readline().rstrip()
            try:
                if line[0] != '#' and len(line) != 0:
                    dMatch = re_split( r'\s+', line.rstrip() )
                    if len(dMatch) < 19:
                        raise FormatError( "Something is wrong with this line:\n%s" % (line) )
                    refined_match = dMatch[0:18] + [" ".join([str(i) for i in dMatch[18:]])]
                    return HmmerHitTBL(refined_match)
            except IndexError:
                return {}

Example #41

0

Show file

File: tornado-run.py Project: badpasta/radius-tools

 def post(self):
     origin_json = jsonLoads(self.request.body)
     sql_table = 'id, username'
     sql_key = origin_json.keys()[0] or 'username'
     sql_keyword = '%' + origin_json.values()[0] + '%'
     print sql_keyword
     sql_dict = dict(tables = sql_table, 
                     key = sql_key, 
                     keyword = sql_keyword)
     sql = self.forms['radcheck']['select_user']
     sql_context = sql % sql_dict
     origin_data = yield Task(self.db.select, sql_context)
     table_name = re_split(', ', sql_table)
     user_list = map(lambda x: dict(map(lambda z,d: (d,z), 
                              x,table_name)), origin_data)
     the_data = { "userlist": user_list,
                  "tables_name": table_name}
     self.write(convJson(the_data))

Example #42

0

Show file

File: models.py Project: jaywink/pyaspora

    def parse_line(cls, line, create=True, **kwargs):
        tags = []
        seen = set()
        for possible_tag in re_split('[,\s]+', line):
            possible_tag = possible_tag.lower()

            if possible_tag in seen:
                continue
            else:
                seen.add(possible_tag)

            tag = cls.get_by_name(
                possible_tag,
                create=create,
                **kwargs
            )
            if tag:
                tags.append(tag)
        return tags

Example #43

0

Show file

 def episode_parser(value):
     values = re_split('[a-zA-Z]', value)
     values = [x for x in values if x]
     ret = []
     for letters_elt in values:
         dashed_values = letters_elt.split('-')
         dashed_values = [x for x in dashed_values if x]
         if len(dashed_values) > 1:
             for _ in range(0, len(dashed_values) - 1):
                 start_dash_ep = parse_numeral(dashed_values[0])
                 end_dash_ep = parse_numeral(dashed_values[1])
                 for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                     ret.append(dash_ep)
         else:
             ret.append(parse_numeral(letters_elt))
     if len(ret) > 1:
         return {None: ret[0], 'episodeList': ret}  # TODO: Should support seasonList also
     elif len(ret) > 0:
         return ret[0]
     else:
         return None

Example #44

0

Show file

File: guess_episodes_rexps.py Project: larsw/guessit

 def episode_parser(value):
     values = re_split('[a-zA-Z]', value)
     values = [x for x in values if x]
     ret = []
     for letters_elt in values:
         dashed_values = letters_elt.split('-')
         dashed_values = [x for x in dashed_values if x]
         if len(dashed_values) > 1:
             for _ in range(0, len(dashed_values) - 1):
                 start_dash_ep = parse_numeral(dashed_values[0])
                 end_dash_ep = parse_numeral(dashed_values[1])
                 for dash_ep in range(start_dash_ep, end_dash_ep + 1):
                     ret.append(dash_ep)
         else:
             ret.append(parse_numeral(letters_elt))
     if len(ret) > 1:
         return {None: ret[0], 'episodeList': ret}  # TODO: Should support seasonList also
     elif len(ret) > 0:
         return ret[0]
     else:
         return None

Example #45

0

Show file

 def install_calico(self):
     # This produces an obnoxious diff on every subsequent run
     # Using a helm chart does not, so we should switch to that
     # However, we need to figure out how to get the helm chart
     # accessible by the CDK lambda first. Not clear how to give
     # s3 perms to it programmatically, and while ECR might be
     # an option it also doesn't seem like there's a way to push
     # the chart with existing api calls.
     # Probably need to do some custom lambda thing.
     for manifest in manifests:
         filename = f"{manifest[0]}.yaml"
         if isfile(filename):
             with open(filename) as f:
                 manifest_text = f.read()
         else:
             manifest_text = requests_get(manifest[1]).text
         loaded_manifests = [
             yaml_safe_load(i)
             for i in re_split("^---$", manifest_text, flags=MULTILINE) if i
         ]
         crds = eks.KubernetesManifest(
             self.scope,
             "calico-crds",
             cluster=self.eks_cluster,
             manifest=[
                 crd for crd in loaded_manifests
                 if crd["kind"] == "CustomResourceDefinition"
             ],
         )
         non_crds = eks.KubernetesManifest(
             self.scope,
             "calico",
             cluster=self.eks_cluster,
             manifest=[
                 notcrd for notcrd in loaded_manifests
                 if notcrd["kind"] != "CustomResourceDefinition"
             ],
         )
         non_crds.node.add_dependency(crds)

Example #46

0

Show file

def CInstructionHandler(Instruction):
    global DestDict
    global CompAndJumpDict
    splitted_instruction = re_split("[=;]", Instruction)
    if len(splitted_instruction) == 3:
        dest = DestDict[splitted_instruction[0]]
        comp = CompAndJumpDict[splitted_instruction[1]]
        jump = CompAndJumpDict[splitted_instruction[2]]
        return "111" + comp + dest + jump
    else:
        equal_location = Instruction.find("=")
        semicolon_location = Instruction.find(";")
        if equal_location == -1:  # Missing destination
            comp = CompAndJumpDict[splitted_instruction[0]]
            jump = CompAndJumpDict[splitted_instruction[1]]
            dest = "000"
            return "111" + comp + dest + jump
        if semicolon_location == -1:  # Missing Jump
            dest = DestDict[splitted_instruction[0]]
            comp = CompAndJumpDict[splitted_instruction[1]]
            jump = "000"
            return "111" + comp + dest + jump

Example #47

0

Show file

File: chart_strings.py Project: davidwilliamson82/chart_script

def chart_strings(number_of_tables, interval, ranges, sheet='Sheet1'):
    """This function takes some information about multiple excel tables 
    and generates references for multiple columns in those tables, 
    for the purpose of generating line graphs from the data."""
    from string import ascii_uppercase
    from re import match, split as re_split

    def offset(index, table_index):
        # This adjusts the index of a column, based on the table_index.
        # It depends on re.match.
        if index[0] == '$':
            return index.replace('$', '')
        if match(r'[A-Z]+', index):
            return to_alpha(from_alpha(index) + table_index)
        else:
            return index

    output = []
    prefix = "='{}'!".format(sheet)

    # To generate the list of range references, each name is followed by the prefix and the offset range indices.
    for table_index in range(0, number_of_tables * interval, interval):
        table_references = []
        for name, cells in ranges:
            reference = [
                offset(index, table_index) for index in flatten([
                    re_split(r'(\d+)', index)[:-1]
                    for index in cells.split(':')
                ])
            ]
            if len(reference) > 2:
                table_references.append('{}{}${}${}:${}${}'.format(
                    name, prefix, *reference))
            else:
                table_references.append('{}{}${}${}'.format(
                    name, prefix, *reference))
        output.append(tuple(table_references))
    return tuple(output)

Example #48

0

Show file

File: webauthorprofile_corefunctions.py Project: aw-bib/tind-invenio

def _get_pubs_per_year_dictionary(pubyearslist):
    '''
    Returns a dict consisting of: year -> number of publications in that year (given a personID).
    @param person_id: int personid
    @return [{'year':no_of_publications}, bool]
    '''
    yearsdict = {}
    for _, years in pubyearslist:
        year_list = []
        for date in years['year_fields']:
            try:
                year_list.append(int(re_split(year_pattern, date[0])[1]))
            except IndexError:
                continue

        if year_list:
            min_year = min(year_list)
            try:
                yearsdict[min_year] += 1
            except KeyError:
                yearsdict[min_year] = 1

    return yearsdict

Example #49

0

Show file

File: SpamFilter.py Project: aelshen/575-Project

 def GetTimestamp(self, time):
     minute,sec,__ = re_split('[ms]', time)
     seconds = 60 * int(minute) + int(sec)
     
     return seconds

Example #50

0

Show file

File: feature-selector.py Project: Ne88ie/CSC_2014

def get_word_vector(line):
    words = []
    for word in re_split('[\s,.:;!?<>d+="()%\-0-9]', line.lower()):
        if word != '' and word[0] != '@':
            words.append(word)
    return words

Example #51

0

Show file

File: text_corpora.py Project: barliant/LaNCoA

def shuffle_corpus(corpus, delimiter_list, mode, end_sign):
    """Randomize words in the text, transforming the text
    into the meaningless form.

    Two different shuffling principles are implemented:
    shuffling on the sentence level and
    shuffling on the whole text level.

    In the text-level shuffling, the original text is
    randomized by shuffling the words and punctuation
    marks over the whole text.

    Parameters
    ----------
    corpus : file
        original file on which shuffling
        procedure will be applied
    delimiter_list : list
        list of delimiters
    mode : sentence or text
        shuffling principles
    end_sign : char
        character that will mark end of a sentence
    """
    from re import split as re_split
    from random import shuffle
    from random import randint

    with open(corpus, "r", encoding="utf-8") as f:
        f_r = f.read()

    punct_split_dict = {d: str(d) + " " for d in delimiter_list}

    for k, v in punct_split_dict.iteritems():
        f_r = f_r.replace(k, v)

    corpus_list = f_r.split()
    corpus_list = [word for word in corpus_list if
                   word != "" and word not in delimiter_list]

    if mode == "sentence":
        text = " ".join(corpus_list)
        sentences = re_split('\.|\?|\!|\. |\? |\! ', text)

        sentences_split = [i.split() for i in sentences]
        for i in sentences_split:
            shuffle(i)

        shuffled_list = [x.strip() for x in
                         [" ".join(x) + end_sign for x in sentences_split]]

        with open(corpus.rsplit(".", 1)[0] + "_sentence_shuffled." +
                          corpus.rsplit(".", 1)[1], "w",
                  encoding="utf-8") as write_f:
            write_f.write(" ".join(shuffled_list))

    elif mode == "text":
        shuffled_list = corpus_list
        shuffle(shuffled_list)

        if delimiter_list:
            delimiter_count = 0
            for i, word in enumerate(shuffled_list):
                for d in delimiter_list:
                    if word[-len(d):] == d:
                        delimiter_count += 1
                        shuffled_list[i] = word[:-len(d)]
            while delimiter_count:
                rand = randint(0, len(shuffled_list) - 1)
                if shuffled_list[rand][-len(end_sign):] != end_sign:
                    shuffled_list[rand] += end_sign
                    delimiter_count -= 1

        with open(corpus.rsplit(".", 1)[0] + "_text_shuffled." +
                          corpus.rsplit(".", 1)[1], "w",
                  encoding="utf-8") as write_f:
            write_f.write(" ".join(shuffled_list))

Example #52

0

Show file

File: utils.py Project: JoshuaMankelow/GithubPython

def parse_time(timestamp):
    if timestamp:
        return datetime.datetime(*map(int, re_split(r"[^\d]", timestamp.replace("+00:00", ""))))
    return None

Example #53

0

Show file

File: bst_affiliations.py Project: fschwenn/inspire

def get_affiliations_for_author(pid):
    # get all papers for the specified author
    # (different query for the search engine of INSPIRE and Atlantis db)
    names_dict = _get_person_names_dicts_bai(pid)
    cname = canonical_name(pid)
    recs = prs(0, 'author:%s' % str(cname))

    # get mapping of: affiliation -> papers
    names_list = names_dict['db_names_dict'].keys()
    affiliation_to_recs = _get_institute_pubs_dict(recs, names_list)

    # get mapping of: paper -> year it was written
    a = ''.join(format_record(r, 'WAPDAT') for r in recs)
    rec_to_years = [deserialize(p) for p in a.strip().split('!---THEDELIMITER---!') if p]

    # inverse the above mapping to: year -> papers that where written that year
    year_to_recs = dict()
    for rec, year_fields in rec_to_years:
        years = list()
        for date in year_fields['year_fields']:
            try:
                # 'date' may be in a form like: '1941-02-01'
                # so we extract the year from the date with regex
                years.append(int(re_split(year_pattern, date[0])[1]))
            except IndexError:
                continue
        if years:
        # from the set of years that the paper is
        # associated with we keep the oldest year
            try:
                year_to_recs[min(years)].add(rec)
            except KeyError:
                year_to_recs[min(years)] = set([rec])

    # we are interested in the affiliation of the paper/s
    # which were written most recently
    most_recent_papers = list()
    for year in sorted(year_to_recs.keys(), reverse=True):
        most_recent_papers.append((year_to_recs[year], year))

    # inverse the mapping of: affiliation -> papers
    # to: paper -> affiliations
    rec_to_aff = dict()
    for affiliation, papers in affiliation_to_recs.iteritems():
        for rec in papers:
            rec_to_aff.setdefault(rec, []).append(affiliation)

    # get the affiliations of the most recent papers
    affiliations = set()
    aff_year = None
    for papers, year in most_recent_papers:
        for rec in papers:
            try:
                for aff in rec_to_aff[rec]:
                    affiliations.add(aff)
            except KeyError:
                pass
        if affiliations:
            aff_year = year
            break

    return list(affiliations), aff_year

Example #54

0

Show file

File: views.py Project: EliteSoba/discord

def parse_time(timestamp):
	if timestamp:
		return datetime.datetime(*map(int, re_split(r'[^\d]', timestamp.replace('+00:00', ''))))
	return None

Example #55

0

Show file

File: CustomParamHandler.py Project: elespike/burp-cph

    def modify_message(self, tab, msg_as_string):
        ph_matchnum_txt = tab.param_handl_txtfield_match_indices.getText()

        ph_target_exp         = tab.get_exp_pane_expression(tab.param_handl_exp_pane_target        )
        ph_extract_static_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_static)
        ph_extract_single_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_single)
        ph_extract_macro_exp  = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_macro )
        ph_extract_cached_exp = tab.get_exp_pane_expression(tab.param_handl_exp_pane_extract_cached)

        if not ph_target_exp:
            self.logger.warning(
                'No match expression specified! Skipping tab "{}".'.format(
                    tab.namepane_txtfield.getText()
                )
            )
            return msg_as_string

        exc_invalid_regex = 'Skipping tab "{}" due to error in expression {{}}: {{}}'.format(
            tab.namepane_txtfield.getText()
        )

        try:
            match_exp = re_compile(ph_target_exp)
        except re_error as e:
            self.logger.error(exc_invalid_regex.format(ph_target_exp, e))
            return msg_as_string

        # The following code does not remove support for groups,
        # as the original expression will be used for actual replacements.
        # We simply need an expression without capturing groups to feed into re.findall(),
        # which enables the logic for granular control over which match indices to target.

        # Removing named groups to normalize capturing groups.
        findall_exp = re_sub('\?P<.+?>', '', ph_target_exp)
        # Removing capturing groups to search for full matches only.
        findall_exp = re_sub(r'(?<!\\)\(([^?]*?)(?<!\\)\)', '\g<1>', findall_exp)
        findall_exp = re_compile(findall_exp)
        self.logger.debug('findall_exp: {}'.format(findall_exp.pattern))

        all_matches = re_findall(findall_exp, msg_as_string)
        self.logger.debug('all_matches: {}'.format(all_matches))

        match_count = len(all_matches)
        if not match_count:
            self.logger.warning(
                'Skipping tab "{}" because this expression found no matches: {}'.format(
                    tab.namepane_txtfield.getText(),
                    ph_target_exp
                )
            )
            return msg_as_string

        matches     = list()
        dyn_values  = ''
        replace_exp = ph_extract_static_exp

        if tab.param_handl_dynamic_chkbox.isSelected():
            find_exp, target_txt = '', ''
            selected_item = tab.param_handl_combo_extract.getSelectedItem()

            if selected_item == tab.PARAM_HANDL_COMBO_EXTRACT_CACHED:
                find_exp, target_txt = ph_extract_cached_exp, tab.param_handl_cached_resp_viewer.getMessage()
                target_txt = self.helpers.bytesToString(target_txt)

            elif selected_item == tab.PARAM_HANDL_COMBO_EXTRACT_SINGLE:
                self.issue_request(tab)
                find_exp, target_txt = ph_extract_single_exp, self.helpers.bytesToString(tab.response)

            elif selected_item == tab.PARAM_HANDL_COMBO_EXTRACT_MACRO:
                find_exp, target_txt = ph_extract_macro_exp, self.final_macro_resp

            if not find_exp:
                self.logger.warning(
                    'No dynamic value extraction expression specified! Skipping tab "{}".'.format(
                        tab.namepane_txtfield.getText()
                    )
                )
                return msg_as_string

            try:
                # Making a list to enable multiple iterations.
                matches = list(re_finditer(find_exp, target_txt))
            except re_error as e:
                self.logger.error(exc_invalid_regex.format(ph_extract_macro_exp, e))
                return msg_as_string

            if not matches:
                self.logger.warning('Skipping tab "{}" because this expression found no matches: {}'.format(
                    tab.namepane_txtfield.getText(),
                    find_exp
                ))
                return msg_as_string

            groups = {}
            groups_keys = groups.viewkeys()
            for match in matches:
                gd = match.groupdict()
                # The given expression should have unique group matches.
                for k in gd.keys():
                    if k in groups_keys:
                        self.logger.warning('Skipping tab "{}" because this expression found ambiguous matches: {}'.format(
                            tab.namepane_txtfield.getText(),
                            find_exp
                        ))
                        return msg_as_string
                groups.update(gd)

            # Remove '$' not preceded by '\'
            exp = re_sub(r'(?<!\\)\$', '', ph_target_exp)
            flags = re_match('\(\?[Limuxs]{1,6}\)', ph_target_exp)
            if flags is not None and 'x' in flags.group(0):
                exp += '\n'

            groups_exp = ''.join(['(?P<{}>{})'.format(group_name, group_match) for group_name, group_match in groups.items()])
            dyn_values = ''.join(groups.values())

            # No need for another try/except around this re.compile(),
            # as ph_target_exp was already checked when compiling match_exp earlier.
            # match_exp = re_compile(exp + groups_exp + end)
            match_exp = re_compile(exp + groups_exp)
            self.logger.debug('match_exp adjusted to:\n{}'.format(match_exp.pattern))

        subsets = ph_matchnum_txt.replace(' ', '').split(',')
        match_indices = []
        for subset in subsets:
            try:
                if ':' in subset:
                    sliceindex = subset.index(':')
                    start = int(subset[:sliceindex   ])
                    end   = int(subset[ sliceindex+1:])
                    if start < 0:
                        start = match_count + start
                    if end < 0:
                        end = match_count + end
                    for match_index in range(start, end):
                        match_indices.append(match_index)
                else:
                    match_index = int(subset)
                    if match_index < 0:
                        match_index = match_count + match_index
                    match_indices.append(match_index)
            except ValueError as e:
                self.logger.error(
                    'Ignoring invalid match index or slice on tab "{}" due to {}'.format(
                        tab.namepane_txtfield.getText(),
                        e
                    )
                )
                continue

        match_indices = set(sorted([m for m in match_indices if m < match_count]))
        self.logger.debug('match_indices: {}'.format(match_indices))

        # Using findall_exp to avoid including capture groups in the result.
        message_parts = re_split(findall_exp, msg_as_string)
        self.logger.debug('message_parts: {}'.format(message_parts))

        # The above strategy to use re.split() in order to enable the usage of match_indices
        # ends up breaking non-capturing groups. At this point, however, we can safely remove
        # all non-capturing groups and everything will be peachy.
        ncg_exp = re_compile('\(\?[^P].+?\)')
        if re_search(ncg_exp, match_exp.pattern) is not None:
            match_exp = re_compile(ncg_exp.sub('', match_exp.pattern))
            if flags is not None:
                match_exp = re_compile(flags.group(0) + match_exp.pattern)
            self.logger.debug('match_exp adjusted to:\n{}'.format(match_exp.pattern))

        modified_message  = ''
        remaining_indices = list(match_indices)
        for part_index, message_part in enumerate(message_parts):
            if remaining_indices and part_index == remaining_indices[0]:
                try:
                    final_value = match_exp.sub(replace_exp, all_matches[part_index] + dyn_values)
                except (re_error, IndexError) as e:
                    self.logger.error(exc_invalid_regex.format(match_exp.pattern + ' or expression ' + replace_exp, e))
                    return msg_as_string
                self.logger.debug('Found:\n{}\nreplaced using:\n{}\nin string:\n{}'.format(
                    match_exp.pattern,
                    replace_exp,
                    all_matches[part_index] + dyn_values
                ))
                final_value = message_part + final_value
                modified_message += final_value
                remaining_indices.pop(0)
            elif part_index < match_count:
                modified_message += message_part + all_matches[part_index]
            else:
                modified_message += message_part

        return modified_message