Python regexp_compile 예제들, re.regexp_compile Python 예제들

예제 #1

0

파일 보기

파일: check_process_list.py 프로젝트: konstantinbest/igmonplugins

class Check:
    """Check consists of the variable name, operator, and a value"""
    operators = {
        '~=': lambda b: regexp_compile(b).match,
        '==': lambda b: lambda a: a == b,
        '!=': lambda b: lambda a: a != b,
        '<=': lambda b: lambda a: a <= b,
        '>=': lambda b: lambda a: a >= b,
        '<': lambda b: lambda a: a < b,
        '>': lambda b: lambda a: a > b,
    }

    def __init__(self, var, symbol, value, divider=None):
        self.var = var
        self.symbol = symbol
        self.value = value
        self.executor = self.operators[symbol](value)
        if divider:
            if divider != 'min':
                raise NotImplemented('Only "/min" is supported')
            self.divider = timedelta(minutes=1)
        else:
            self.divider = None

    def __str__(self):
        key = self.var
        if self.divider:
            key += ' / {}'.format(self.divider)

        return '{} {} {}'.format(key, self.symbol, self.value)

    def __call__(self, process):
        if self.divider:
            value = process.get_scaled_value(self.var, self.divider)
            if not value:
                return False
        else:
            value = process[self.var]

        return self.executor(value)

    @classmethod
    def parse(cls, pair):
        for symbol in sorted(cls.operators.keys(), key=len, reverse=True):
            if symbol in pair:
                index = pair.index(symbol)
                right_split = pair[:index].split('/', 1)
                var = right_split[0].strip()
                if len(right_split) > 1:
                    divider = right_split[1].strip()
                else:
                    divider = None

                value = cast(pair[(index + len(symbol)):].strip())

                return cls(var, symbol, value, divider)

        raise ValueError('Cannot parse {}'.format(pair))

예제 #2

0

파일 보기

파일: extractor.py 프로젝트: NAMD/pypln.backend

import shlex

from HTMLParser import HTMLParser
from tempfile import NamedTemporaryFile
from os import unlink
from subprocess import Popen, PIPE
from mimetypes import guess_type
from re import compile as regexp_compile, DOTALL, escape

import cld
import magic

from pypln.backend.celery_task import PyPLNTask


regexp_tags = regexp_compile(r'(<[ \t]*([a-zA-Z0-9!"./_-]*)[^>]*>)', flags=DOTALL)
regexp_comment = regexp_compile(r'<!--.*?-->', flags=DOTALL)
regexp_spaces_start = regexp_compile('([\n]+)[ \t]*',
        flags=DOTALL)
regexp_spaces_end = regexp_compile('[ \t]*\n', flags=DOTALL)
regexp_newlines = regexp_compile('[\n]{3,}', flags=DOTALL)
regexp_spaces = regexp_compile('[ \t]{2,}', flags=DOTALL)
regexp_punctuation = regexp_compile('[ \t]*([' + escape('!,.:;?') + '])',
        flags=DOTALL)
breakline_tags = ['table', '/table', 'tr', 'div', '/div', 'h1', '/h1', 'h2',
                  '/h2', 'h3', '/h3', 'h4', '/h4', 'h5', '/h5', 'h6', '/h6',
                  'br', 'br/']
double_breakline = ['table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']

def clean(text):
    text = regexp_spaces_start.sub(r'\1', text)

예제 #3

0

파일 보기

파일: nubank.py 프로젝트: turicas/nubank-to-csv

# coding: utf-8

from __future__ import unicode_literals

import argparse
import datetime

from collections import OrderedDict
from decimal import Decimal
from re import compile as regexp_compile

import rows

from lxml.etree import HTML

REGEXP_PAGE = regexp_compile(r'^[0-9]+ de [0-9]+$')
MONTHS = 'JAN FEV MAR ABR MAI JUN JUL AGO SET OUT NOV DEZ'
FIELDS = OrderedDict([('category', rows.fields.TextField),
                      ('description', rows.fields.TextField),
                      ('value', rows.fields.DecimalField),
                      ('date', rows.fields.DateField)])


def partition(data, number):
    for index in range(0, len(data), number):
        yield data[index:index + number]


def convert_text(text):
    return text.replace('\xa0', ' ')

예제 #4

0

파일 보기

#!/usr/bin/env python
# coding: utf-8

import argparse
import sys
import time

from datetime import timedelta
from re import compile as regexp_compile
from tempfile import TemporaryFile

import pymongo

from pypln.api import PyPLN

regexp_mongodb = regexp_compile(r'([^:]+):([^/]+)/([^/]+)/(.+)')


def partition(iterator, n):
    iterator = iter(iterator)
    finished = False
    while not finished:
        values = []
        for i in range(n):
            try:
                values.append(iterator.next())
            except StopIteration:
                finished = True
        if values:
            yield values

예제 #5

0

파일 보기

파일: check_mysql_processes.py 프로젝트: konstantinbest/igmonplugins

class Check:
    pattern = regexp_compile('\s*'.join([   # Allow spaces between everything
        '\A',
        '(?:',                              # Count clause
        '(?P<count_number>[0-9]+)',
        '(?P<count_unit>%?)',
        ')?',
        '(?:in',                            # Transaction after separator
        '(?P<txn>transaction)',
        '(?:for',                           # Time clause after separator
        '(?P<txn_time_number>[0-9]+)',
        '(?P<txn_time_unit>{time_units})?'
        ')?',
        '(?:at',                            # State after separator
        '(?P<txn_state>[a-z ]+?)',
        ')?',
        ')?',
        '(?:on',                            # Command after separator
        '(?P<command>[a-z ]+?)',
        ')?',
        '(?:for',                           # Time clause after separator
        '(?P<command_time_number>[0-9]+)',
        '(?P<command_time_unit>{time_units})?'
        ')?',
        '(?:at',                            # State after separator
        '(?P<command_state>[a-z ]+?)',
        ')?',
        '\Z',
    ]).format(
        time_units='|'.join(k for k, v in Interval.units)
    ))

    def __init__(self, arg):
        matches = self.pattern.match(arg)
        if not matches:
            raise ArgumentTypeError('"{}" cannot be parsed'.format(arg))
        self.count_number = int(matches.group('count_number') or 1)
        self.count_unit = matches.group('count_unit')
        self.txn = matches.group('txn')
        self.txn_time = Interval(
            int(matches.group('txn_time_number') or 0),
            matches.group('txn_time_unit') or Interval.units[0][0],
        )
        self.txn_state = matches.group('txn_state')
        self.command = matches.group('command')
        self.command_state = matches.group('command_state')
        self.command_time = Interval(
            int(matches.group('command_time_number') or 0),
            matches.group('command_time_unit') or Interval.units[0][0],
        )

    def __repr__(self):
        return "'{}'".format(self.__str__())

    def __str__(self):
        return str(self.count_number) + self.count_unit + self.get_spec_str()

    def get_spec_str(self):
        spec = ''
        if self.txn:
            spec += ' in {}'.format(self.txn)
        if self.txn_time:
            spec += ' for {}'.format(self.txn_time)
        if self.txn_state:
            spec += ' at {}'.format(self.txn_state)
        if self.command:
            spec += ' on {}'.format(self.command)
        if self.command_time:
            spec += ' for {}'.format(self.command_time)
        if self.command_state:
            spec += ' at {}'.format(self.command_state)
        return spec

    def relative(self):
        return bool(self.count_unit)

    def get_problem(self, db):
        count = 0
        for process in db.get_processes():
            if process['time'] < int(self.command_time):
                if not self.txn_time:
                    break
                continue
            if self.fail_command(process):
                continue
            if self.txn and self.fail_txn(process, db):
                continue
            count += 1

        if count >= self.get_count_limit(db):
            return self.format_problem(count)
        return None

    def fail_command(self, process):
        # Command time is checked by the caller.
        if self.command and process['command'].lower() != self.command:
            return True
        if self.command_state:
            if not process['state'].lower().startswith(self.command_state):
                return True
        return False

    def fail_txn(self, process, db):
        txn_info = db.get_txn(process['id'])
        if not txn_info:
            return True
        if txn_info['seconds'] < int(self.txn_time):
            return True
        if self.txn_state:
            if not txn_info['state'].lower().startswith(self.txn_state):
                return True
        return False

    def get_count_limit(self, db):
        if not self.relative():
            return self.count_number
        return self.count_number * db.get_max_connections() / 100.0

    def format_problem(self, count):
        problem = '{} processes{}'.format(count, self.get_spec_str())
        if self.count_number > 1 or self.count_unit:
            problem += ' exceeds ' + str(self.count_number) + self.count_unit
        return problem

예제 #6

0

파일 보기

import os

from collections import namedtuple, OrderedDict
from io import BytesIO
from pathlib import Path
from re import compile as regexp_compile

import requests
import requests_cache
import rows
import rows.utils

URL_YEARS = 'http://www.sports-reference.com/olympics/summer/'
URL_COUNTRIES = 'http://www.sports-reference.com/olympics/countries/'
URL_DATA = 'http://www.sports-reference.com/olympics/countries/{country_code}/summer/{year}/'
REGEXP_COUNTRY = regexp_compile(r'/olympics/countries/([A-Z]{3})/">([^<]+)<')
FIELDS = OrderedDict([
    ('rk', rows.fields.IntegerField),
    ('athlete', rows.fields.TextField),
    ('gender', rows.fields.TextField),
    ('age', rows.fields.IntegerField),
    ('sport', rows.fields.TextField),
    ('gold', rows.fields.IntegerField),
    ('silver', rows.fields.IntegerField),
    ('bronze', rows.fields.IntegerField),
    ('total', rows.fields.IntegerField),
])
FULL_FIELDS = OrderedDict([
    ('year', rows.fields.IntegerField),
    ('country_code', rows.fields.TextField),
    ('country_name', rows.fields.TextField),

예제 #7

0

파일 보기

파일: ods_draft.py 프로젝트: tilacog/rows

# coding: utf-8

import HTMLParser
import zipfile

from re import compile as regexp_compile, DOTALL
from unicodedata import normalize


html_parser = HTMLParser.HTMLParser()
regexp_tags = regexp_compile(r'<[ \t]*[a-zA-Z0-9!"./_-]*[^>]*>', flags=DOTALL)
regexp_comment = regexp_compile(r'<!--.*?-->', flags=DOTALL)

regexp_ods_table = regexp_compile(r'(<table:table [^>]*>)(.*?)'
                                  r'(</table:table>)',
                                  flags=DOTALL)
regexp_ods_table_row = regexp_compile(r'(<table:table-row[^>]*>)(.*?)'
                                      r'(</table:table-row>)', flags=DOTALL)
regexp_ods_table_cell = regexp_compile(r'(<table:table-cell[^>]*>)(.*?)'
                                       r'(</table:table-cell>)', flags=DOTALL)

# TODO: encoding?
# TODO: replace &...;
# TODO: name/id of tables
# TODO: re.MULTILINE
# TODO: identify types
# TODO: clear empty rows?
# TODO: clear non-table rows?


def tables_ods(filename, headers=False, strip_xml=True):

예제 #8

0

파일 보기

파일: check_process_list.py 프로젝트: konstantinbest/igmonplugins

# THE SOFTWARE.

from argparse import ArgumentParser, RawTextHelpFormatter
from collections import defaultdict
from datetime import datetime, timedelta
from operator import itemgetter
from os.path import isfile
from re import compile as regexp_compile
from subprocess import Popen, PIPE
from sys import exit

# The option arguments which accept a check
CHECK_ARGS = ['match', 'parent', 'exclude', 'warning', 'critical']

TIMEDELTA_PATTERN = regexp_compile('\A(((?P<days>[0-9]+)(\-| *days?,? *))?'
                                   '((?P<hours>[0-9]+):))?'
                                   '(?P<minutes>[0-9]+):'
                                   '(?P<seconds>[0-9]+(\.[0-9]+)?)\Z')


def main():
    """The main program

    This function puts together everything.  It parses the arguments,
    runs the tests, prints the results and exits with a Nagios compatible
    exit code.
    """
    args = parse_args()
    columns = ['pid', 'command']
    for arg_name in CHECK_ARGS:
        for check in getattr(args, arg_name):
            if check.var not in columns:

예제 #9

0

파일 보기

파일: check_process_list.py 프로젝트: lacunoc/igmonplugins

from argparse import ArgumentParser, RawTextHelpFormatter
from collections import defaultdict
from datetime import datetime, timedelta
from operator import itemgetter
from os.path import isfile
from re import compile as regexp_compile
from subprocess import Popen, PIPE
from sys import exit

# The option arguments which accept a check
CHECK_ARGS = ['match', 'parent', 'exclude', 'warning', 'critical']

TIMEDELTA_PATTERN = regexp_compile(
    '\A(((?P<days>[0-9]+)(\-| *days?,? *))?'
    '((?P<hours>[0-9]+):))?'
    '(?P<minutes>[0-9]+):'
    '(?P<seconds>[0-9]+(\.[0-9]+)?)\Z'
)


def main():
    """The main program

    This function puts together everything.  It parses the arguments,
    runs the tests, prints the results and exits with a Nagios compatible
    exit code.
    """
    args = parse_args()
    columns = ['pid', 'command']
    for arg_name in CHECK_ARGS:
        for check in getattr(args, arg_name):

예제 #10

0

파일 보기

파일: nubank.py 프로젝트: turicas/nubank-to-csv

from __future__ import unicode_literals

import argparse
import datetime

from collections import OrderedDict
from decimal import Decimal
from re import compile as regexp_compile

import rows

from lxml.etree import HTML


REGEXP_PAGE = regexp_compile(r'^[0-9]+ de [0-9]+$')
MONTHS = 'JAN FEV MAR ABR MAI JUN JUL AGO SET OUT NOV DEZ'
FIELDS = OrderedDict([('category', rows.fields.TextField),
                      ('description', rows.fields.TextField),
                      ('value', rows.fields.DecimalField),
                      ('date', rows.fields.DateField)])


def partition(data, number):
    for index in range(0, len(data), number):
        yield data[index:index + number]


def convert_text(text):
    return text.replace('\xa0', ' ')

예제 #11

0

파일 보기

BSON_DATE = ord(bson.BSONDAT) # WTF, pymongo?
MONTHS = {'jan': 1, 'fev': 2, 'mar': 3, 'abr': 4,  'mai': 5,  'jun': 6,
          'jul': 7, 'ago': 8, 'set': 9, 'out': 10, 'nov': 11, 'dez': 12,

          'feb': 2, 'apr': 4, 'may': 5, 'aug': 8,  'sep': 9,  'oct': 10,
          'dec': 12}
FULL_MONTHS = {'janeiro': 1,   'fevereiro': 2, u'março': 3,    'abril': 4,
               'maio': 5,      'junho': 6,     'julho': 7,     'agosto': 8,
               'setembro': 9,  'outubro': 10,  'novembro': 11, 'dezembro': 12,

               'january': 1,   'februrary': 2, 'march': 3,     'april': 4,
               'may': 5,       'june': 6,      'july': 7,      'august': 8,
               'september': 9, 'october': 10,  'november': 11, 'december': 12,}
regexp_almost_iso_date = \
        regexp_compile(r'([0-9]{4}-[0-9]{2}-[0-9]{2})t([0-9]{2}:[0-9]{2}:[0-9]{2})([+-]+[0-9:]*)')


def get_offset_datetime(offset):
    if offset.lower() == 'gmt':
        offset = '+0000'
    offset_signal = int(offset[0] + '1')
    offset_hours = int(offset[1:3])
    offset_minutes = int(offset[3:5])
    total_offset_seconds = offset_signal * (offset_hours * 3600 +
                                            offset_minutes * 60)
    offset_in_days = total_offset_seconds / (3600.0 * 24)
    return datetime.timedelta(offset_in_days)


def parse_pt_date(date_string):

예제 #12

0

파일 보기

import sys
import time

from os import path, walk
from re import compile as regexp_compile, DOTALL

import pymongo

doc_format = '''
<doc id="([^"]+)" url="([^"]+)" title="[^"]+">
([^\n]+)

(.*)
</doc>'''.strip()

doc_regexp = regexp_compile(doc_format, flags=DOTALL)
doc_fields = ('id', 'url', 'title', 'text')
regexp_mongodb = regexp_compile(r'([^:]+):([^/]+)/([^/]+)/(.+)')


def parse_doc(text):
    '''Parse a WP page in "<doc>...</doc>" format and return a dict'''
    result = doc_regexp.findall(text)
    page = dict(zip(doc_fields, result[0]))
    page['text'] = page['text'].replace('</ref>', '')\
                               .replace('</math>', '').strip()
    return page


def parse_docs(raw_text):
    '''Given a string with "<doc>...</doc>"s, return a list of dicts'''

예제 #13

0

파일 보기

파일: wiki.py 프로젝트: dickon/wiki

"""

from json import loads, dumps
from os.path import join, isfile, isdir, join
from tempfile import gettempdir
from os import listdir, makedirs, rename
from re import compile as regexp_compile
from time import time
from functools import wraps
from flask import Flask, request, abort, Response, jsonify

APP = Flask(__name__)
APP.config.update(dict(ROOT=join(gettempdir(), "wikidata")))
APP.config.from_envvar('WIKI_SETTINGS', silent=True)

DOCUMENT_TITLE_REGEXP = regexp_compile("[A-Za-z0-9]{1,50}$")
TIMESTAMP_REGEXP = regexp_compile(r"\d+(\.\d+)?$")

# library functions

def get_version_directories(title):
    """Return a list of version strings for a page:

    Args:
       title (str): page title, assumed to be verified

    Returns:
       List[str]: list of timestamps in string form, sorted in
                  floating point numeric order
    """
    page_directory = join(APP.config['ROOT'], title)

예제 #14

0

파일 보기

#!/usr/bin/env python
# coding: utf-8

import glob
import os
import sys

from collections import defaultdict
from re import compile as regexp_compile

regexp_finished = regexp_compile(r'Job finished: id=([a-f0-9]+), '
                                 'worker=([a-zA-Z0-9]+)')

regexp_job_duration = regexp_compile(r'\[API\] Request to router: ({[^\n]+})')


def parse_log(filename):
    with open(filename) as fobj:
        contents = fobj.read()

    ids_and_worker_names = regexp_finished.findall(contents)
    job_ids = defaultdict(list)
    map(lambda x: job_ids[x[1]].append(x[0]), ids_and_worker_names)

    job_durations = {}
    for raw_message in regexp_job_duration.findall(contents):
        if 'job finished' in raw_message:
            instruction = 'data = {}'.format(raw_message)
            namespace = {}
            exec instruction in namespace
            data = namespace['data']

예제 #15

0

파일 보기

파일: extractor.py 프로젝트: pombredanne/pypln.backend

import shlex

from HTMLParser import HTMLParser
from tempfile import NamedTemporaryFile
from os import unlink
from subprocess import Popen, PIPE
from mimetypes import guess_type
from re import compile as regexp_compile, DOTALL, escape

import cld
import magic

from pypln.backend.celery_task import PyPLNTask

regexp_tags = regexp_compile(r'(<[ \t]*([a-zA-Z0-9!"./_-]*)[^>]*>)',
                             flags=DOTALL)
regexp_comment = regexp_compile(r'<!--.*?-->', flags=DOTALL)
regexp_spaces_start = regexp_compile('([\n]+)[ \t]*', flags=DOTALL)
regexp_spaces_end = regexp_compile('[ \t]*\n', flags=DOTALL)
regexp_newlines = regexp_compile('[\n]{3,}', flags=DOTALL)
regexp_spaces = regexp_compile('[ \t]{2,}', flags=DOTALL)
regexp_punctuation = regexp_compile('[ \t]*([' + escape('!,.:;?') + '])',
                                    flags=DOTALL)
breakline_tags = [
    'table', '/table', 'tr', 'div', '/div', 'h1', '/h1', 'h2', '/h2', 'h3',
    '/h3', 'h4', '/h4', 'h5', '/h5', 'h6', '/h6', 'br', 'br/'
]
double_breakline = ['table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']


def clean(text):