def get(): if not Shell.check(["clang", "-dumpversion"]): return Compiler("clang") version_output = Shell.get(["clang", "-v"]) full_version = Regex.version(version_output) major_version = Regex.first_number(full_version) conan_version = full_version[:3] if System.is_mac and major_version > 9: conan_version = full_version[:4] name = "clang" if System.is_mac and not Args.android: name = "apple-clang" return Compiler(name = "clang", is_available = True, libcxx = "libc++", conan_name = name, full_version = full_version, major_version = major_version, conan_version = conan_version, CC = "clang", CXX = "clang++")
def get_literal(character, is_case_insensitive): if is_case_insensitive and not suppress_case_insensitive: lowercase = character.lower() uppercase = character.upper() if lowercase != uppercase: return Regex.Literal([(ord(lowercase), ord(lowercase)), (ord(uppercase), ord(uppercase))]) return Regex.Literal([(ord(character), ord(character))])
def _parse_opening_tag_match(self, opening_tag_match, text): tag_closed_count = 1 end_pos = opening_tag_match.end() tag_dict = opening_tag_match.groupdict() tag, attributes = tag_dict["tag"], tag_dict["attributes"] opening_tag_p = Regex.OPENING_TAG_P_FORMAT.format(tag=tag) closing_tag_p = Regex.CLOSING_TAG_P_FORMAT.format(tag=tag) while tag_closed_count != 0: opening_tag = re.compile(opening_tag_p).search(text, end_pos) closing_tag = re.compile(closing_tag_p).search(text, end_pos) nearest_match_tag = Regex.nearest_match(opening_tag, closing_tag) if nearest_match_tag is None: raise Exceptions.NoMatchingClosingTagError( tag_dict["opening_tag"]) elif nearest_match_tag == opening_tag: tag_closed_count += 1 elif nearest_match_tag == closing_tag: tag_closed_count -= 1 end_pos = nearest_match_tag.end() content = self._create_content( text[opening_tag_match.end():closing_tag.start()]) element = Element.Element(tag, content, attributes) return element, end_pos
def ttest_find_rparen(self): pattern = "abc(hd)df" i = Regex.find_rParen(pattern, 4) self.assertEqual(i, 6) pattern = "abc(h(d)d)f" i = Regex.find_rParen(pattern, 4) self.assertEqual(i, 9) pattern = "(ab()ch(d)d)f" i = Regex.find_rParen(pattern, 1) self.assertEqual(i, 11) pattern = "(ab(())()((()))ch(d)d)f" i = Regex.find_rParen(pattern, 1) self.assertEqual(i, 21)
def parse_character_class(self): """ Parse a character class ([...]) expression from the string at its current index. @return: a Regex.Literal object representing the characters """ characters = self.parse_character_class_expression() return Regex.Literal([i for i in characters])
def ttest_find_rparen(self): pattern = "abc(hd)df" i = Regex.find_rParen(pattern, 4) self.assertEqual(i, 6) pattern = "abc(h(d)d)f" i = Regex.find_rParen(pattern, 4) self.assertEqual(i, 9) pattern = "(ab()ch(d)d)f" i = Regex.find_rParen(pattern, 1) self.assertEqual(i, 11) pattern = "(ab(())()((()))ch(d)d)f" i = Regex.find_rParen(pattern, 1) self.assertEqual(i, 21)
def home_handle(home_url, dic): list_blog = BeautifulSoup.home_parse(home_url) for url in list_blog: if (Regex.url_judge(url)): print("Begin handling:\t", url) str_html = splider_test.splider(url) BeautifulSoup.parse(str_html, dic)
def ttest_find_nextGroup(self): data = {'a':'a', 'ab':'ab', 'abc':'abc', 'a(':'a', 'ab(':'ab', 'abc(':'abc', 'a|':'a', 'ab|':'ab', 'abc|':'abc', 'a*':'a', 'ab*':'a', 'abc*':'ab'} for k, v in data.items(): r = Regex.find_nextGroup(k, 0) print k, v, r, k[0:r] self.assertEqual(k[0:r], v)
def home_parse(url): list_url = [] str_html = splider_test.splider(url) soup = BeautifulSoup(str_html,'lxml') list_h3 = soup.find_all(attrs={'class':'am-list-item-hd'}) for item in list_h3: list_url.append(Regex.url_match(str(item))) return list_url
def parse_variable(self): """ Parse a variable instance from the string at its current index @return: a Regex.Variable object representing the variable instance """ variable_name = '' while self.next_is(string.ascii_letters + '.'): variable_name += self.get_next() self.expect("}") return Regex.Variable(variable_name)
def parse_qualified(self): """ Parse the string from its current index into a repetition or an expression that can be contained by an repetition. @return: a visitable regular expression object from the Regex package. """ child = self.parse_character() if self.get_next_if(u'*'): return Regex.Repetition(child, 0, Regex.Repetition.Infinity) elif self.get_next_if(u'+'): return Regex.Repetition(child, 1, Regex.Repetition.Infinity) elif self.get_next_if(u'?'): return Regex.Repetition(child, 0, 1) # Need to look ahead two characters because it might be a variable following this one elif self.next_is(u'{') and self.nth_next_is_not( 2, string.ascii_letters + '.'): self.get_next() return self.parse_repetition(child) else: return child
def classificarBufferComentarioBloco(self, numero_linha, buffer): self.buffer_comentario_bloco = "" regex = Regex.Regex() resultado = regex.identificaComentarioBloco(buffer) # print(buffer) # print(resultado) if not resultado: self.teve_erro = True return f"{numero_linha} CoMF {buffer}" return ""
def __init__(self): self.regex = Regex.Regex() self.outLine = Output.Output() self.instrucaoAtual = None self.paramBloco = [] self.blocosCod = [] self.estadoAtual = '' self.listaDePrints = [] self.contInteracoes = 0 self.blocoAtual = None self.fita = Ribbon.Ribbon() self.pilhaBloco = []
def parse_concatenation(self): """ Parse the string from its current index into a concatenation or an expression that can be contained by an concatenation. @return: a visitable regular expression object from the Regex package. """ repetitions = [self.parse_qualified()] while self.next_is_not(u'|)'): repetitions.append(self.parse_qualified()) if len(repetitions) > 1: return Regex.Concatenation(repetitions) else: return repetitions[0]
def parse_alternation(self): """ Parse the string from its current index into an alternation or an expression that can be contained by an alternation. @return: a visitable regular expression object from the Regex package. """ concatenations = [self.parse_concatenation()] while self.get_next_if(u'|'): concatenations.append(self.parse_concatenation()) if len(concatenations) > 1: return Regex.Alternation(concatenations) else: return concatenations[0]
def doRegex(): data = request.form['hasil'] data = json.JSONDecoder().decode(data) pattern = Regex(data['spam_indicator']) search_type = data['search_type'] tweets = dict() if (search_type == "1"): username = data['username'] count = data['count'] tweets = api.search_timeline(username, count) else: region = data['region'] count = data['count'] tweets = api.search_region(region, count) is_spam = [] for tweet in tweets['full_text']: is_spam.append(pattern.is_match(tweet)) tweets['is_spam'] = is_spam return json.dumps(tweets)
def get(): if not Shell.check(["gcc", "-dumpversion"]): return Compiler("gcc") supported_versions = [7, 8, 9, 10] default_version = Shell.get(["gcc", "-dumpversion"]) default_major_version = Regex.first_number(default_version) if default_major_version in supported_versions: return Compiler(name="gcc", is_available=True, full_version=default_version, major_version=default_major_version, conan_version=default_version[:3], CXX="g++") version = None for ver in supported_versions: if Shell.check(["gcc-" + str(ver), "-dumpversion"]): version = ver break if not version: return Compiler("gcc") full_version = Shell.get(["gcc-" + str(version), "-dumpversion"]) major_version = Regex.first_number(full_version) return Compiler(name="gcc", is_available=True, full_version=full_version, major_version=Regex.first_number(full_version), conan_version=full_version[:3], CC="gcc-" + str(version), CXX="g++-" + str(version))
def test_accept(self): # pattern='(a|b)*abb' # ls=['abb','ababb','vabb','abbabababb','abba'] # pattern='(a|b)*(aa|bb)(a|b)* ' # ls=['abb','ababb','aabb','ababa','aababa'] # pattern = '(1*01*0)*1*' # ls = ['00', '', '10111', '10001', '100100', '0101010', '1010101'] # pattern = '(a|b)*aa|bb' # ls = ['bb', '', 'a', 'aa', 'aaa', 'aaaa', 'b', 'abb', 'bbb', 'bbbb', 'ababa', 'abaab'] pattern = 'abc*' ls = ['ab', '', 'aa', 'abc', 'abcc', 'abccc'] for i in ls: print i, Regex.accept(pattern, i)
def test_accept(self): # pattern='(a|b)*abb' # ls=['abb','ababb','vabb','abbabababb','abba'] # pattern='(a|b)*(aa|bb)(a|b)* ' # ls=['abb','ababb','aabb','ababa','aababa'] # pattern = '(1*01*0)*1*' # ls = ['00', '', '10111', '10001', '100100', '0101010', '1010101'] # pattern = '(a|b)*aa|bb' # ls = ['bb', '', 'a', 'aa', 'aaa', 'aaaa', 'b', 'abb', 'bbb', 'bbbb', 'ababa', 'abaab'] pattern = 'abc*' ls = ['ab', '', 'aa', 'abc', 'abcc', 'abccc'] for i in ls: print i,Regex.accept(pattern, i)
def __init__(self, rules, strict=False, ignore=[], charset=frozenset(map(chr, range(128)))): self.rules = rules self.strict = strict self.ignore = ignore self.charset = charset self.ldfa = None for rule in self.rules: value = rule.value if rule.isRegex else "\\" + "\\".join(rule.value) ldfa = Regex.regexToLDFA(value, self.charset) if self.ldfa is None: self.ldfa = ldfa else: self.ldfa.merge(ldfa) self.ldfa.minimize()
def parse_repetition(self, child): """ Parse a {min, max} expression from the string at its current index @param child: a visital object from the Regex package containing the repeated expression. @return: a Regex.Repetition object representing the repetition. """ first = self.parse_integer() self.expect(u',') last = self.parse_integer() self.expect(u'}') if first > last: raise RegexParserExceptionInternal( "Minimum repetition (%d) cannot be larger than maximum repetition (%d)." % (first, last)) return Regex.Repetition(child, first, last)
def classificarBuffer(self, buffer, numero_linha): self.buffer = "" regex = Regex.Regex() #print(buffer, '-----', buffer[0]) if buffer in self.palavrasReservadas: resultado = f"{numero_linha} PRE {buffer}" return resultado elif buffer[0].isdigit(): # colocar regex para validar números resultado = regex.identificadorNumero(buffer) if resultado: return f"{numero_linha} NRO {buffer}" else: self.teve_erro = True return f"{numero_linha} NMF {buffer}" elif buffer[0].isalpha(): # colocar regex para validar identificadores resultado = regex.identificaIdentificador(buffer) if resultado: return f"{numero_linha} IDE {buffer}" else: self.teve_erro = True return f"{numero_linha} IMF {buffer}" pass elif buffer[0] == '"': # regex para cadeia de caracteres resultado = regex.identificadorCadeiaCaracteres(buffer) if resultado: return f"{numero_linha} CAD {buffer}" else: self.teve_erro = True return f"{numero_linha} CMF {buffer}" elif buffer[0] == '-': resultado = regex.identificadorNumeroNegativo(buffer) if resultado: return f"{numero_linha} NRO {buffer}" else: self.teve_erro = True return f"{numero_linha} NMF {buffer}" else: self.teve_erro = True return f"{numero_linha} SIB {buffer}"
def ttest_find_nextGroup(self): data = { 'a': 'a', 'ab': 'ab', 'abc': 'abc', 'a(': 'a', 'ab(': 'ab', 'abc(': 'abc', 'a|': 'a', 'ab|': 'ab', 'abc|': 'abc', 'a*': 'a', 'ab*': 'a', 'abc*': 'ab' } for k, v in data.items(): r = Regex.find_nextGroup(k, 0) print k, v, r, k[0:r] self.assertEqual(k[0:r], v)
def tests_of_my_friend(self): testcases = { "abc": {"a": False, "abc": True}, "ab|c": {"ab": True, "ac": False, "c": True, "abc": False}, "ab*c": {"ac": True, "abc": True, "abbbbbbbc": True, "bc": False, "ab": False}, "a(bb)*c": {"ac": True, "abc": False, "abbc": True, "abbbc": False, "abbbbc": True}, "(ab|cx)*": {"": True, "ab": True, "abab": True, "cx": True, "cxcx": True, "abcx": True}, "ca*|b": {"cb": False, "ca": True, "caaaaa": True, "c": True}, "01*": {"01111": True, "011000": False}, "(a|b*)*": {"ab": True, "abba": True}, "(0|1)01": {"001": True, "101": True, "010": False} } for expr, tests in testcases.items(): nfa = Regex.compile(expr) for string, expected_answer in tests.items(): path = nfa.match(string) actual_answer = True if path else False self.assertEqual(expected_answer, actual_answer)
def test_complex_example_2_3(self): nfa = Regex.compile('(0|11*0)*') path = nfa.match('11111') result = True if path else False self.assertEqual(False, result)
def test_klenee_star_match_2(self): nfa = Regex.compile('0*1*') path = nfa.match('00011') result = True if path else False self.assertEqual(True, result)
def test_from_example_5(self): nfa = Regex.compile('01*') path = nfa.match('0') result = True if path else False self.assertEqual(True, result)
def test_simple_match(self): nfa = Regex.compile('0') path = nfa.match('0') result = True if path else False self.assertEqual(True, result)
def test_from_example_2_2(self): nfa = Regex.compile('(0|1)01') path = nfa.match('101') result = True if path else False self.assertEqual(True, result)
result_gabungan = './dummy/hasil_gabungan.csv' path_model = './input/skipgram/ws_model100.bin' # baris ini buat ganti2 model yang ingin digunakan path_hasil_tfidf = './dummy/Report_tfidf.txt' path_hasil_word2vec = './dummy/Report_word2vec.txt' path_hasil_gabungan = './dummy/Report_gabungan.txt' metode_tfidf = 'TF-IDF' metode_w2vec = 'WORD2VEC' metode_gabungan = 'GABUNGAN' # --- Load Preprocessing --- print("=== Preprocessing ===") pr.praproses_data(data_input, data_clean) # --- Load Kamus Kata (unique word) --- print("=== Fitur Freq Perdoc & Alldoc ===") fitur_onedoc, fitur_alldoc = Regex.load_fitur_postag(data_clean) # ''' print("=== Bag Of Words ===") bow = tfidf.bagofword(fitur_alldoc) start_time1 = time.time() # --- Load Feature Extraction Using TF IDF--- print("=== NEW Feature Extraction TfIdf ===") hasil_ekstraksi_tfidf, bow = tfidf.main(fitur_onedoc, fitur_alldoc, result_tfidf) h_loss_tfidf = cr.cross_validation(result_tfidf, data_label, path_hasil_tfidf, metode_tfidf) waktu.write("TF-IDF " + "--- %s seconds ---" % (time.time() - start_time1) + '\n') start_time1 = time.time() # --- Load Feature Extraction Using Vector --- print("=== NEW Feature Extraction Vector ===")
import Regex from IO import IO if __name__ == '__main__': io = IO() lines = io.read("input.txt") output = "" nfa = Regex.compile(lines[0]) N = int(lines[1]) for i in range(0, N): for state in nfa.match(lines[2 + i]): output += str(state) + " " output += "\n" io.write("output.txt", output[:-1])
def test_from_example_3_3(self): nfa = Regex.compile('00(0|1)*') path = nfa.match('1000') result = True if path else False self.assertEqual(False, result)
import JSON import Regex distance = { "127.0.1.4": 10, "127.0.1.5": 0, "127.0.1.2": 10, "127.0.1.3": 10 } mensagem = JSON.Update("abb","abb",distance) print(type(distance)) print(mensagem) if Regex.CheckADD("add 192.168.15.5 0"): print("YES! We have a match!") else: print("No match")
def test_from_example_3_4(self): nfa = Regex.compile('00(0|1)*') path = nfa.match('0011') result = True if path else False