Python Regexの例、Regex Pythonの例

コード例 #1

0

ファイルを表示

ファイル: Clang.py プロジェクト: VladasZ/build_tools

def get():

    if not Shell.check(["clang", "-dumpversion"]):
        return Compiler("clang")

    version_output = Shell.get(["clang", "-v"])
    full_version  = Regex.version(version_output)

    major_version = Regex.first_number(full_version)

    conan_version = full_version[:3]

    if System.is_mac and major_version > 9:
        conan_version = full_version[:4]

    name = "clang"

    if System.is_mac and not Args.android:
        name = "apple-clang"
        
    return Compiler(name          = "clang",
                    is_available  = True,
                    libcxx        = "libc++",
                    conan_name    = name,
                    full_version  = full_version,
                    major_version = major_version,
                    conan_version = conan_version,
                    CC            = "clang",
                    CXX           = "clang++")

コード例 #2

0

ファイルを表示

 def get_literal(character, is_case_insensitive):
     if is_case_insensitive and not suppress_case_insensitive:
         lowercase = character.lower()
         uppercase = character.upper()
         if lowercase != uppercase:
             return Regex.Literal([(ord(lowercase), ord(lowercase)),
                                   (ord(uppercase), ord(uppercase))])
     return Regex.Literal([(ord(character), ord(character))])

コード例 #3

0

ファイルを表示

ファイル: Parser.py プロジェクト: kfirc/html_parser

    def _parse_opening_tag_match(self, opening_tag_match, text):
        tag_closed_count = 1
        end_pos = opening_tag_match.end()
        tag_dict = opening_tag_match.groupdict()
        tag, attributes = tag_dict["tag"], tag_dict["attributes"]

        opening_tag_p = Regex.OPENING_TAG_P_FORMAT.format(tag=tag)
        closing_tag_p = Regex.CLOSING_TAG_P_FORMAT.format(tag=tag)

        while tag_closed_count != 0:
            opening_tag = re.compile(opening_tag_p).search(text, end_pos)
            closing_tag = re.compile(closing_tag_p).search(text, end_pos)

            nearest_match_tag = Regex.nearest_match(opening_tag, closing_tag)
            if nearest_match_tag is None:
                raise Exceptions.NoMatchingClosingTagError(
                    tag_dict["opening_tag"])
            elif nearest_match_tag == opening_tag:
                tag_closed_count += 1
            elif nearest_match_tag == closing_tag:
                tag_closed_count -= 1

            end_pos = nearest_match_tag.end()

        content = self._create_content(
            text[opening_tag_match.end():closing_tag.start()])
        element = Element.Element(tag, content, attributes)
        return element, end_pos

コード例 #4

0

ファイルを表示

    def ttest_find_rparen(self):
        pattern = "abc(hd)df"
        i = Regex.find_rParen(pattern, 4)
        self.assertEqual(i, 6)

        pattern = "abc(h(d)d)f"
        i = Regex.find_rParen(pattern, 4)
        self.assertEqual(i, 9)

        pattern = "(ab()ch(d)d)f"
        i = Regex.find_rParen(pattern, 1)
        self.assertEqual(i, 11)

        pattern = "(ab(())()((()))ch(d)d)f"
        i = Regex.find_rParen(pattern, 1)
        self.assertEqual(i, 21)

コード例 #5

0

ファイルを表示

 def parse_character_class(self):
     """
     Parse a character class ([...]) expression from the string at its current index.
     @return: a Regex.Literal object representing the characters
     """
     characters = self.parse_character_class_expression()
     return Regex.Literal([i for i in characters])

コード例 #6

0

ファイルを表示

ファイル: test_Regex.py プロジェクト: zyxstar/compilerStudy

    def ttest_find_rparen(self):
        pattern = "abc(hd)df"
        i = Regex.find_rParen(pattern, 4)
        self.assertEqual(i, 6)

        pattern = "abc(h(d)d)f"
        i = Regex.find_rParen(pattern, 4)
        self.assertEqual(i, 9)

        pattern = "(ab()ch(d)d)f"
        i = Regex.find_rParen(pattern, 1)
        self.assertEqual(i, 11)

        pattern = "(ab(())()((()))ch(d)d)f"
        i = Regex.find_rParen(pattern, 1)
        self.assertEqual(i, 21)

コード例 #7

0

ファイルを表示

ファイル: Main.py プロジェクト: byby221b/OO-Spider

def home_handle(home_url, dic):
    list_blog = BeautifulSoup.home_parse(home_url)
    for url in list_blog:
        if (Regex.url_judge(url)):
            print("Begin handling:\t", url)
            str_html = splider_test.splider(url)
            BeautifulSoup.parse(str_html, dic)

コード例 #8

0

ファイルを表示

ファイル: test_Regex.py プロジェクト: zyxstar/compilerStudy

    def ttest_find_nextGroup(self):
        data = {'a':'a', 'ab':'ab', 'abc':'abc', 'a(':'a', 'ab(':'ab', 'abc(':'abc',
                  'a|':'a', 'ab|':'ab', 'abc|':'abc', 'a*':'a', 'ab*':'a', 'abc*':'ab'}

        for k, v in data.items():
            r = Regex.find_nextGroup(k, 0)
            print k, v, r, k[0:r]
            self.assertEqual(k[0:r], v)

コード例 #9

0

ファイルを表示

ファイル: BeautifulSoup.py プロジェクト: byby221b/OO-Spider

def home_parse(url):
    list_url = []
    str_html = splider_test.splider(url)
    soup = BeautifulSoup(str_html,'lxml')
    list_h3 = soup.find_all(attrs={'class':'am-list-item-hd'})
    for item in list_h3:
        list_url.append(Regex.url_match(str(item)))
    return list_url

コード例 #10

0

ファイルを表示

 def parse_variable(self):
     """
     Parse a variable instance from the string at its current index
     @return: a Regex.Variable object representing the variable instance
     """
     variable_name = ''
     while self.next_is(string.ascii_letters + '.'):
         variable_name += self.get_next()
     self.expect("}")
     return Regex.Variable(variable_name)

コード例 #11

0

ファイルを表示

 def parse_qualified(self):
     """
     Parse the string from its current index into a repetition or an expression that can be contained by an repetition.
     @return: a visitable regular expression object from the Regex package.
     """
     child = self.parse_character()
     if self.get_next_if(u'*'):
         return Regex.Repetition(child, 0, Regex.Repetition.Infinity)
     elif self.get_next_if(u'+'):
         return Regex.Repetition(child, 1, Regex.Repetition.Infinity)
     elif self.get_next_if(u'?'):
         return Regex.Repetition(child, 0, 1)
     # Need to look ahead two characters because it might be a variable following this one
     elif self.next_is(u'{') and self.nth_next_is_not(
             2, string.ascii_letters + '.'):
         self.get_next()
         return self.parse_repetition(child)
     else:
         return child

コード例 #12

0

ファイルを表示

ファイル: AnalisadorLexico.py プロジェクト: SaitamaXD/Compilador

 def classificarBufferComentarioBloco(self, numero_linha, buffer):
     self.buffer_comentario_bloco = ""
     regex = Regex.Regex()
     resultado = regex.identificaComentarioBloco(buffer)
     # print(buffer)
     # print(resultado)
     if not resultado:
         self.teve_erro = True
         return f"{numero_linha} CoMF {buffer}"
     return ""

コード例 #13

0

ファイルを表示

ファイル: Machine.py プロジェクト: RooD7/TuringMachine

	def __init__(self):
		self.regex = Regex.Regex()
		self.outLine = Output.Output()
		self.instrucaoAtual = None
		self.paramBloco = []
		self.blocosCod = []
		self.estadoAtual = ''
		self.listaDePrints = []
		self.contInteracoes = 0
		self.blocoAtual = None
		self.fita = Ribbon.Ribbon()
		self.pilhaBloco = []

コード例 #14

0

ファイルを表示

 def parse_concatenation(self):
     """
     Parse the string from its current index into a concatenation or an expression that can be contained by an concatenation.
     @return: a visitable regular expression object from the Regex package.
     """
     repetitions = [self.parse_qualified()]
     while self.next_is_not(u'|)'):
         repetitions.append(self.parse_qualified())
     if len(repetitions) > 1:
         return Regex.Concatenation(repetitions)
     else:
         return repetitions[0]

コード例 #15

0

ファイルを表示

 def parse_alternation(self):
     """
     Parse the string from its current index into an alternation or an expression that can be contained by an alternation.
     @return: a visitable regular expression object from the Regex package.
     """
     concatenations = [self.parse_concatenation()]
     while self.get_next_if(u'|'):
         concatenations.append(self.parse_concatenation())
     if len(concatenations) > 1:
         return Regex.Alternation(concatenations)
     else:
         return concatenations[0]

コード例 #16

0

ファイルを表示

def doRegex():

    data = request.form['hasil']
    data = json.JSONDecoder().decode(data)
    pattern = Regex(data['spam_indicator'])
    search_type = data['search_type']
    tweets = dict()
    if (search_type == "1"):
        username = data['username']
        count = data['count']
        tweets = api.search_timeline(username, count)
    else:
        region = data['region']
        count = data['count']
        tweets = api.search_region(region, count)

    is_spam = []
    for tweet in tweets['full_text']:
        is_spam.append(pattern.is_match(tweet))

    tweets['is_spam'] = is_spam

    return json.dumps(tweets)

コード例 #17

0

ファイルを表示

def get():

    if not Shell.check(["gcc", "-dumpversion"]):
        return Compiler("gcc")

    supported_versions = [7, 8, 9, 10]
    default_version = Shell.get(["gcc", "-dumpversion"])
    default_major_version = Regex.first_number(default_version)

    if default_major_version in supported_versions:
        return Compiler(name="gcc",
                        is_available=True,
                        full_version=default_version,
                        major_version=default_major_version,
                        conan_version=default_version[:3],
                        CXX="g++")

    version = None

    for ver in supported_versions:
        if Shell.check(["gcc-" + str(ver), "-dumpversion"]):
            version = ver
            break

    if not version:
        return Compiler("gcc")

    full_version = Shell.get(["gcc-" + str(version), "-dumpversion"])
    major_version = Regex.first_number(full_version)

    return Compiler(name="gcc",
                    is_available=True,
                    full_version=full_version,
                    major_version=Regex.first_number(full_version),
                    conan_version=full_version[:3],
                    CC="gcc-" + str(version),
                    CXX="g++-" + str(version))

コード例 #18

0

ファイルを表示

    def test_accept(self):
        #        pattern='(a|b)*abb'
        #        ls=['abb','ababb','vabb','abbabababb','abba']
        #        pattern='(a|b)*(aa|bb)(a|b)* '
        #        ls=['abb','ababb','aabb','ababa','aababa']
        #        pattern = '(1*01*0)*1*'
        #        ls = ['00', '', '10111', '10001', '100100', '0101010', '1010101']
        #        pattern = '(a|b)*aa|bb'
        #        ls = ['bb', '', 'a', 'aa', 'aaa', 'aaaa', 'b', 'abb', 'bbb', 'bbbb', 'ababa', 'abaab']

        pattern = 'abc*'
        ls = ['ab', '', 'aa', 'abc', 'abcc', 'abccc']

        for i in ls:
            print i, Regex.accept(pattern, i)

コード例 #19

0

ファイルを表示

ファイル: test_Regex.py プロジェクト: zyxstar/compilerStudy

    def test_accept(self):
#        pattern='(a|b)*abb'
#        ls=['abb','ababb','vabb','abbabababb','abba']
#        pattern='(a|b)*(aa|bb)(a|b)* '
#        ls=['abb','ababb','aabb','ababa','aababa']   
#        pattern = '(1*01*0)*1*'
#        ls = ['00', '', '10111', '10001', '100100', '0101010', '1010101']
#        pattern = '(a|b)*aa|bb'
#        ls = ['bb', '', 'a', 'aa', 'aaa', 'aaaa', 'b', 'abb', 'bbb', 'bbbb', 'ababa', 'abaab']
        
        pattern = 'abc*'
        ls = ['ab', '', 'aa', 'abc', 'abcc', 'abccc']

        for i in ls:
            print i,Regex.accept(pattern, i)

コード例 #20

0

ファイルを表示

ファイル: Lexer.py プロジェクト: coquelicot/PyDSL

    def __init__(self, rules, strict=False, ignore=[], charset=frozenset(map(chr, range(128)))):
        self.rules = rules
        self.strict = strict
        self.ignore = ignore
        self.charset = charset

        self.ldfa = None
        for rule in self.rules:
            value = rule.value if rule.isRegex else "\\" + "\\".join(rule.value)
            ldfa = Regex.regexToLDFA(value, self.charset)
            if self.ldfa is None:
                self.ldfa = ldfa
            else:
                self.ldfa.merge(ldfa)
        self.ldfa.minimize()

コード例 #21

0

ファイルを表示

 def parse_repetition(self, child):
     """
     Parse a {min, max} expression from the string at its current index
     @param child: a visital object from the Regex package containing the repeated expression.
     @return: a Regex.Repetition object representing the repetition.
     """
     first = self.parse_integer()
     self.expect(u',')
     last = self.parse_integer()
     self.expect(u'}')
     if first > last:
         raise RegexParserExceptionInternal(
             "Minimum repetition (%d) cannot be larger than maximum repetition (%d)."
             % (first, last))
     return Regex.Repetition(child, first, last)

コード例 #22

0

ファイルを表示

ファイル: AnalisadorLexico.py プロジェクト: SaitamaXD/Compilador

 def classificarBuffer(self, buffer, numero_linha):
     self.buffer = ""
     regex = Regex.Regex()
     #print(buffer, '-----', buffer[0])
     if buffer in self.palavrasReservadas:
         resultado = f"{numero_linha} PRE {buffer}"
         return resultado
     elif buffer[0].isdigit():
         # colocar regex para validar números
         resultado = regex.identificadorNumero(buffer)
         if resultado:
             return f"{numero_linha} NRO {buffer}"
         else:
             self.teve_erro = True
             return f"{numero_linha} NMF {buffer}"
     elif buffer[0].isalpha():
         # colocar regex para validar identificadores
         resultado = regex.identificaIdentificador(buffer)
         if resultado:
             return f"{numero_linha} IDE {buffer}"
         else:
             self.teve_erro = True
             return f"{numero_linha} IMF {buffer}"
         pass
     elif buffer[0] == '"':
         # regex para cadeia de caracteres
         resultado = regex.identificadorCadeiaCaracteres(buffer)
         if resultado:
             return f"{numero_linha} CAD {buffer}"
         else:
             self.teve_erro = True
             return f"{numero_linha} CMF {buffer}"
     elif buffer[0] == '-':
         resultado = regex.identificadorNumeroNegativo(buffer)
         if resultado:
             return f"{numero_linha} NRO {buffer}"
         else:
             self.teve_erro = True
             return f"{numero_linha} NMF {buffer}"
     else:
         self.teve_erro = True
         return f"{numero_linha} SIB {buffer}"

コード例 #23

0

ファイルを表示

    def ttest_find_nextGroup(self):
        data = {
            'a': 'a',
            'ab': 'ab',
            'abc': 'abc',
            'a(': 'a',
            'ab(': 'ab',
            'abc(': 'abc',
            'a|': 'a',
            'ab|': 'ab',
            'abc|': 'abc',
            'a*': 'a',
            'ab*': 'a',
            'abc*': 'ab'
        }

        for k, v in data.items():
            r = Regex.find_nextGroup(k, 0)
            print k, v, r, k[0:r]
            self.assertEqual(k[0:r], v)

コード例 #24

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

    def tests_of_my_friend(self):
        testcases = {
            "abc":           {"a": False,  "abc": True},
            "ab|c":          {"ab": True,  "ac": False,  "c": True,         "abc": False},
            "ab*c":          {"ac": True,  "abc": True,  "abbbbbbbc": True, "bc": False,    "ab": False},
            "a(bb)*c":       {"ac": True,  "abc": False, "abbc": True,      "abbbc": False, "abbbbc": True},
            "(ab|cx)*":      {"": True,    "ab": True,   "abab": True,      "cx": True,     "cxcx": True,    "abcx": True},
            "ca*|b":         {"cb": False, "ca": True,   "caaaaa": True,    "c": True},
            "01*":           {"01111": True, "011000": False},
            "(a|b*)*":       {"ab": True, "abba": True},
            "(0|1)01":       {"001": True, "101": True, "010": False}
        }

        for expr, tests in testcases.items():
            nfa = Regex.compile(expr)

            for string, expected_answer in tests.items():
                path = nfa.match(string)
                actual_answer = True if path else False
                self.assertEqual(expected_answer, actual_answer)

コード例 #25

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_complex_example_2_3(self):
     nfa = Regex.compile('(0|11*0)*')
     path = nfa.match('11111')
     result = True if path else False
     self.assertEqual(False, result)

コード例 #26

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_klenee_star_match_2(self):
     nfa = Regex.compile('0*1*')
     path = nfa.match('00011')
     result = True if path else False
     self.assertEqual(True, result)

コード例 #27

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_from_example_5(self):
     nfa = Regex.compile('01*')
     path = nfa.match('0')
     result = True if path else False
     self.assertEqual(True, result)

コード例 #28

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_simple_match(self):
     nfa = Regex.compile('0')
     path = nfa.match('0')
     result = True if path else False
     self.assertEqual(True, result)

コード例 #29

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_from_example_2_2(self):
     nfa = Regex.compile('(0|1)01')
     path = nfa.match('101')
     result = True if path else False
     self.assertEqual(True, result)

コード例 #30

0

ファイルを表示

ファイル: _Main.py プロジェクト: gugunm/multilabel-hadith

result_gabungan = './dummy/hasil_gabungan.csv'
path_model      = './input/skipgram/ws_model100.bin' # baris ini buat ganti2 model yang ingin digunakan
path_hasil_tfidf    = './dummy/Report_tfidf.txt'
path_hasil_word2vec = './dummy/Report_word2vec.txt'
path_hasil_gabungan = './dummy/Report_gabungan.txt'
metode_tfidf    = 'TF-IDF'
metode_w2vec    = 'WORD2VEC'
metode_gabungan = 'GABUNGAN'

# --- Load Preprocessing ---
print("=== Preprocessing ===")
pr.praproses_data(data_input, data_clean)

# --- Load Kamus Kata (unique word) ---
print("=== Fitur Freq Perdoc & Alldoc ===")
fitur_onedoc, fitur_alldoc = Regex.load_fitur_postag(data_clean)

# '''
print("=== Bag Of Words ===")
bow = tfidf.bagofword(fitur_alldoc)

start_time1 = time.time()
# --- Load Feature Extraction Using TF IDF---
print("=== NEW Feature Extraction TfIdf ===")
hasil_ekstraksi_tfidf, bow = tfidf.main(fitur_onedoc, fitur_alldoc, result_tfidf)
h_loss_tfidf = cr.cross_validation(result_tfidf, data_label, path_hasil_tfidf, metode_tfidf)
waktu.write("TF-IDF " +  "--- %s seconds ---" % (time.time() - start_time1) + '\n')

start_time1 = time.time()
# --- Load Feature Extraction Using Vector ---
print("=== NEW Feature Extraction Vector ===")

コード例 #31

0

ファイルを表示

import Regex
from IO import IO

if __name__ == '__main__':
    io = IO()
    lines = io.read("input.txt")
    output = ""
    nfa = Regex.compile(lines[0])
    N = int(lines[1])
    for i in range(0, N):
        for state in nfa.match(lines[2 + i]):
            output += str(state) + " "
        output += "\n"

    io.write("output.txt", output[:-1])

コード例 #32

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_from_example_3_3(self):
     nfa = Regex.compile('00(0|1)*')
     path = nfa.match('1000')
     result = True if path else False
     self.assertEqual(False, result)

コード例 #33

0

ファイルを表示

ファイル: Rascunho-h.py プロジェクト: HugoFM2/TP3-Redes

import JSON
import Regex


distance =  {
"127.0.1.4": 10,
"127.0.1.5": 0,
"127.0.1.2": 10,
"127.0.1.3": 10
}
mensagem = JSON.Update("abb","abb",distance)
print(type(distance))
print(mensagem)

if Regex.CheckADD("add 192.168.15.5 0"):
  print("YES! We have a match!")
else:
  print("No match")

コード例 #34

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: bulat-gab/RegExpParser

 def test_from_example_3_4(self):
     nfa = Regex.compile('00(0|1)*')
     path = nfa.match('0011')
     result = True if path else False

Python Regex, whooshの例