def sub_mytag(self, re): """Replacing tagname with attribute""" try: res = eval('self.' + re.group(1)) except: res = eval(re.group(1)) return res
def _privmsg(self, msg): msgs = msg.split() fullnick = msgs[0][1:] nick = fullnick.split("!")[0] #ident = fullnick.split("!")[1].split("@")[0] #hostname = fullnick.split("@")[1] dest = msgs[2] content = ' '.join(msgs[3:]) if dest.lower() == self.adminchan.lower(): tools.debug(msgs[3][1:]) cmd = { '%join': self._cmdJoin, '%part':self._cmdPart, '%quit':self._cmdQuit, '%say':self._cmdSay, '%raw':self._cmdRaw } try: cmd.get(msgs[3][1:])(msg) except: #self.send("PRIVMSG {0} :Message reçu de {1} pour {2} > {3}".format(adminchan, nick, dest, ' '.join(msgs[3:])[1:])) pass #else: # self.send("PRIVMSG {0} :Message reçu de {1} pour {2} > {3}".format(self.adminchan, nick, dest, ' '.join(msgs[3:])[1:])) if dest[0] == '#': re=ytre.search(content) if re: print("Find YouTube video {}".format(re.group('videoid'))) stats=youtube.ytVideoStats(re.group('videoid')) self.send("PRIVMSG {} :\002\00301,00You\00300,04Tube\017\002 {} \002{} | Durée : {} | Vues : {} | J'aime : {} | Je n'aime pas : {}".format(dest, stats['user'], stats['title'], str(datetime.timedelta(seconds=stats['duration'])), stats['view'], stats['like'], stats['dislike'])) else: pass
def load_latest_cfg(dir_path, target=__C): import re cfg_count = None source_cfg_path = None for fname in os.listdir(dir_path): ret = re.search('cfg(\d+)\.yml', fname) if ret != None: if cfg_count is None or (int(re.group(1)) > cfg_count): cfg_count = int(re.group(1)) source_cfg_path = os.path.join(dir_path, ret.group(0)) cfg_from_file(file_name=source_cfg_path, target=target)
def check_none(re): ''' 當正則表達式搜尋不到回傳none以避免報錯即無法辨識 ''' if re: return re.group(1) else: return None
def corpus_files_iter(root_path): file_pattern = os.path.join(root_path, glob_pattern) for file_path in glob.iglob(file_pattern): file_path = os.path.normpath(file_path) re = lang_regex.search(file_path) if not re: continue yield re.group(1), file_path
def email(self,value,errorMessage = ''): value = re.group("\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b") if errorMessage: self.emailError = errorMessage if value == "": return self.emailError else: return True
def setVersion(self, version): """Sets the version of the reporter to version. Recognizes and parses CVS revision strings. """ if version == None: return found = re.search('Revision: (.*) ', version) if found: self.version = re.group(1) else: self.version = version
def pinger(self): while True: try: ln = self.ping.stdout.readline() re = self.PING_RE.search(ln) if re: (cnt, rtt) = self.data["ping"] if self.data.has_key("ping") else (0, None) self.data["ping"] = (cnt+1, float(re.group(1))) self.update() except IOError: break self.after(100, self.pinger)
def pinger(self): while True: try: ln = self.ping.stdout.readline() re = self.PING_RE.search(ln) if re: (cnt, rtt) = self.data["ping"] if self.data.has_key( "ping") else (0, None) self.data["ping"] = (cnt + 1, float(re.group(1))) self.update() except IOError: break self.after(100, self.pinger)
def get_file_name(self): if self.html is None: self.download_html() match = re.search(r'<title[^>]+>([^<]+) - ', self.html) if match: name = re.group(1) else: matches = re.findall('<h1>(.*?)</h1>', self.html) if len(matches) > 1: name = matches[1] else: name = matches[0] return name + '.flv'
def readSource(self, source, pos, controller): re = self.endDeclRE commentStart = re.search(source, pos) if commentStart == -1: self.source = 'No declarations found!' controller.warning('missing end of declaration') return pos content = re.group(1) if string.find(string.lower(content), 'enddecl') != -1: self.source = source[pos:commentStart] nextPos = re.regs[0][1] else: self.pos = 'No declarations found!' nextPos = commentStart return nextPos
def parseFunctionDeclaration(self, text): for re in [self.funcDclRE, self.operatorDclRE, self.operatorNewDclRE]: matchPos = re.search(text) if matchPos != -1: break else: return None funcName, paramString = re.group(1, 2) paramNames = [] if string.strip(paramString) != '': paramStrings = string.split(paramString, ',') for str in paramStrings: str = string.strip(str) if self.paramRE.search(str) != -1: paramNames.append(self.paramRE.group(1)) else: paramNames.append('parse error') return funcName, paramNames
repl:指定的用来替换的字符串 string:目标字符串 count:最多替换次数 flags:同re简介 功能:在目标字符串中以正则表达式的规则匹配字符串,再把他们替换成指定的字符串。可以指定替换的次数,如果不指定,替换所有的匹配字符串 区别:前者返回结果字符串,后者返回一个tuple,第一个元素是结果字符串,第二个元素是表示被替换的次数 ''' print("字符串的替换与修改\n") str3 = "xcs is a good good good man!" print(re.sub(r"(good)", "nice", str3)) print(re.subn(r"(good)", "nice", str3, count=2)) print(type(re.sub(r"(good)", "nice", str3))) #str print(type(re.subn(r"(good)", "nice", str3))) # tuple # 分组 ''' re.group() 概念:除了简单的判断是否匹配之外,正则表达式还有提取子串的功能,用()表示分组 ''' str2 = r"0739-8881510" m = re.match(r"(?P<first>\d{4})-(\d{7})", str2) print(m) # 查看匹配的各组的情况,用序号获取对应组的信息 print(m.group()) print(m.group(1)) print(m.group("first")) print(m.group(2)) print(m.groups())
def _check_phone_allowed_characters(self): for record in self: phone_disallowed_chars = "[^\d\-]" if record.phone: res = re.search(phone_disallowed_chars, record.phone) if res: raise ValidationError("The phone number contains an invalid character: "+ re.group(0)) if record.mobile: res = re.search(phone_disallowed_chars, record.mobile) if res: raise ValidationError("The mobile phone number contains an invalid character: " + res.group(0))
import re fh = open("techinfo.txt") for line in fh: if re.search("python|perl", line): print(re.group()) fh.close()
def shape_element(element): node = {} # [Req.1] We will process only 2 types of top level tags: 'node' and 'way' if element.tag == 'node' or element.tag == 'way' : # [Req.2] Set 'type' with value = node type ('node' or 'way') node['type']=element.tag # For each attrib in node for attrib in element.attrib: # [Req.3] Attributes for latitude ('lat') and longitude ('lon') should be added to a 'pos' array as float if attrib == 'lat': if 'pos' not in node: node['pos'] = [None,None] node['pos'][0] = float (element.attrib['lat']) elif attrib == 'lon': if 'pos' not in node: node['pos'] = [None,None] node['pos'][1] = float (element.attrib['lon']) # [Req.4] Attributes in the CREATED array should be grouped under a key 'created' elif attrib in CREATED: if 'created' not in node: node['created'] = {} node['created'][attrib]=element.attrib[attrib] # [Req.5] All other attributes of 'node' and 'way' should be turned into regular key/value pairs else: node[attrib]=element.attrib[attrib] # We need to look in 'k' for each 'tag' nodes to find the addr:xxx tags we have to group for tag in element.iter('tag'): if 'k' in tag.attrib: # [Req.6] If 'k' in 'tag' node contain problematic char, it will be dropped if problemchars.match(tag.attrib['k']): continue # [Req.7] If 'k' in 'tag' node in TAGDROP it will be dropped elif tag.attrib['k'] in TAGDROP: continue else: # If there are no problematic char and tag has not to be dropped # [Req.8] If k attributes is in the form addr:x it will converted in an array 'address' re = lower_colon.match(tag.attrib['k']) # If it is in form xxx:xxx - we use group() to extract the part before the colon and the part after the colon if re: # Process all tags starting with addr: first_key=re.group(1) second_key=re.group(3) # if there is a second ':' that separates the type/direction of a street, the tag should be ignored if lower_colon.match(second_key): continue elif first_key == 'addr': if not 'address' in node.keys(): node['address'] = {} # [Req.17] Fix addr.street using STREETMAPPING if second_key == 'street': if tag.attrib['v'] in STREETMAPPING: node['address']['street'] = STREETMAPPING[tag.attrib['v']] else: node['address']['street'] = tag.attrib['v'] # [Req.13] Fix postcodes 20100 if second_key == 'postcode' and tag.attrib['v'] == '20100': continue # [Req.13] Fix postcodes 2090 elif second_key == 'postcode' and tag.attrib['v'] == '2090': node['address'][second_key] = '20090' # [Req.14] Fix addr:city elif second_key == 'city' and tag.attrib['v'] == 'milano': node['address'][second_key] = 'Milano' # [Req.15] Fix addr:province elif second_key == 'city' and tag.attrib['v'] == 'MI': node['address'][second_key] = 'Milano' else: node['address'][second_key] = tag.attrib['v'] # there is a lower_colon.match for something different from 'addr' else: # replace ':'' with '__' node[first_key + '__' + second_key] = tag.attrib['v'] else: # other k values (except type that we import renamed in k_type) # [Req.9] If k attributes is named 'type', rename it with 'k_type' if tag.attrib['k'] == 'type': node['k_type'] = tag.attrib['v'] # [Req.10] If k attribute is named 'comment.it:2', rename it with 'comment:it:2' elif tag.attrib['k'] == 'comment.it:2': node['comment:it:2'] = tag.attrib['v'] # [Req.11] If k attribute is named 'step.condition', rename it with 'step_condition' elif tag.attrib['k'] == 'step.condition': node['step_condition'] = tag.attrib['v'] else: node[tag.attrib['k']] = tag.attrib['v'] # [Req.12] If node contains nd tag, there will be transformed into an array for tag in element.iter('nd'): if not 'node_refs' in node.keys(): node['node_refs'] = [] if 'ref' in tag.attrib: node['node_refs'].append(tag.attrib['ref']) return node else: return None
for segment in segments: print segment['file'] segmenthandle = open(segment['file'], "r", -1) while 1: buffer = segmenthandle.read() if len(buffer) == 0: break tshandle.write(buffer) segmenthandle.close() tshandle.close() try: #mkvmerge v5.8.0 ('No Sleep / Pillow') built on Sep 11 2012 21:46:00 mkvmerge = subprocess.check_output(["mkvmerge", "-V"], stderr=subprocess.STDOUT) mkversion = re.search("(mkvmerge v(\d+)\.(\d+).(\d+) [^\n]+)", mkvmerge) if mkversion.group(2) < 5 or mkversion.group(3) < 8: print "mkvmerge >= 5.8.0 required (you have %s)" % re.group(1) sys.exit(1) except CalledProcessError as e: print e.output sys.exit(1) mkv = "%s/final.mkv" % options.scratch if not os.path.isfile(mkv): try: print "remuxing from MPEG-TS -> MKV" evideo = subprocess.check_output(["mkvmerge", "-o", mkv, tsfile], stderr=subprocess.STDOUT) except CalledProcessError as e: print e.output
process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(-1) ) # Define the input source files = [] for f in args.files: if re.match('file:', f): files.append(f) continue m = re.match('root://([^/]+)/(.*)/', f) if m: host = m.group(1) path = [ '' ] for directory in re.group(2).split('/'): if directory == '': continue rd = re.compile(d + '$') newpath = [] for p in path: status, dirlist = XRootD.client.FileSystem(host).dirlist(p) for x in dirlist: if rd.match(x): newpath.append(x) if len(newpath) == 0: if len(path) == 1: raise Exception('Path root://%s/%s/%s does not exist' % host % path[0] % directory) else: msg = 'None of the pathes\n' for p in path: msg += 'root://%s/%s/%s\n' % host % p % directory
# Python regular expression grabbing a middle number import re m = re.search(r"-?\d+\.?\d+\s+(-?\d+\.?\d+)\s+-?\d+\.?\d+", "53.4 -63.2 433.2") print(re.group(1))
def hiduke2yyyymmdd_sub(datestr): yyyymmdd_from = None yyyymmdd_to = None dfrom = None dto = None if datestr == None or (datestr and datestr.strip() == ""): return None, None datestr = datestr.strip() datestr = datestr.replace("[]??()()", "") # あいまい記号を削除 datestr = datestr.replace(" ", "") # 半角全角スペースを削除 datestr = datestr.translate(str.maketrans("1234567890", '1234567890')) # 全角数字を半角に変換 datestr = datestr.translate(str.maketrans("一二三四五六七八九〇元", '12345678901')) # 漢数字を半角数字に変換 datestr = datestr.upper() # アルファベット半角小文字を半角大文字に変換 datestr = datestr.translate(str.maketrans(".-", '//')) #print("datestr={0}".format(datestr)) gengou_label = datestr[0:2] gengou = next((filter(lambda x:x['key'] == gengou_label, DateHelper.GENGOUS)), None) m = REMatcher() if gengou: # 和暦 datestr = datestr.translate(str.maketrans("年月日", '///')) key = gengou['key'] if m.match("^({0})(\d{{1,2}})\/(\d{{1,2}})\/(\d{{1,2}})".format(key) , datestr): syyyy = DateHelper.wareki2yyyy(m.group(1), m.group(2)) dfrom = dto = Date(syyyy, int(re.group(3)), int(re.group(4))) elif m.match("^({0})(\d{{1,2}})\/(\d{{1,2}})".format(key), datestr): syyyy = DateHelper.wareki2yyyy(m.group(1), int(m.group(2))) dfrom = SimpleDate(syyyy, m.group(3)).to_date() dto = SimpleDate(syyyy, m.group(3)).end_of_month() elif m.match("^({0})(\d{{1,2}})".format(key), datestr): syyyy = DateHelper.wareki2yyyy(m.group(1), int(m.group(2))) dfrom = SimpleDate(syyyy).to_date() dto = SimpleDate(syyyy).end_of_year() else: dfrom = Date.strptime(gengou['from'], '%Y%m%d') dto = Date.strptime(gengou['to'], '%Y%m%d') # union if dfrom: yyyymmdd_from = dfrom.strftime("%Y%m%d") if dto: yyyymmdd_to = dto.strftime("%Y%m%d") elif m.match(r"^\d{4}", datestr): # 西暦 datestr = datestr.translate(str.maketrans("年月日", '///')) #print("datestr={0}".format(datestr)) if m.match(r"^(\d{4})\/(\d{1,2})\/(\d{1,2})", datestr): dfrom = dto = SimpleDate(m.group(1), m.group(2), m.group(3)).to_date() elif m.match(r"^(\d{4})\/(\d{1,2})", datestr): dfrom = SimpleDate(m.group(1), m.group(2)).to_date() dto = SimpleDate(m.group(1), m.group(2)).end_of_month() elif m.match(r"^(\d{4})", datestr): dfrom = SimpleDate(m.group(1)).to_date() dto = SimpleDate(m.group(1)).end_of_year() else: print("format error (3) #{datestr}") # union if dfrom: yyyymmdd_from = dfrom.strftime("%Y%m%d") if dto: yyyymmdd_to = dto.strftime("%Y%m%d") else: print("format error (1) #{datestr}") return yyyymmdd_from, yyyymmdd_to
def stateFor(re): if re.group(1): return teststate[1] return teststate[0]
def shape_element(element): node = {} # process only 2 types of top level tags: "node" and "way" if element.tag == "node" or element.tag == "way" : # set "type" key/value with tag name node["type"]=element.tag # For each attrib in node for attrib in element.attrib: # attributes for latitude and longitude should be added to a "pos" array as float if attrib == 'lat': if 'pos' not in node: node['pos'] = [None,None] node['pos'][0] = float (element.attrib['lat']) elif attrib == 'lon': if 'pos' not in node: node['pos'] = [None,None] node['pos'][1] = float (element.attrib['lon']) # attributes in the CREATED array should be added under a key "created" elif attrib in CREATED: if 'created' not in node: node['created'] = {} node['created'][attrib]=element.attrib[attrib] # all attributes of "node" and "way" should be turned into regular key/value pairs else: node[attrib]=element.attrib[attrib] # For address we need to look in "k" for each "tag" nodes for tag in element.iter("tag"): # if "k" contains problematic characters, it should be ignored in any case if problemchars.match(tag.attrib["k"]): continue else: # if "k" starts with "addr:", it should be added to a dictionary "address" re = lower_colon.match(tag.attrib["k"]) if re: if re.group(1) == 'addr': if not "address" in node.keys(): node["address"] = {} addr_key = re.group(3) # if there is a second ":" that separates the type/direction of a street, the tag should be ignored if lower_colon.match(re.group(3)): continue else: node["address"][re.group(3)] = tag.attrib["v"] # there is a lower_colon.match for something different from "addr" else: # discard anything in form key:value where key contains ":" and it is not addr continue else: # other k values node[tag.attrib["k"]]=tag.attrib["v"] # For nodes "node" and "way" # If there is a son tag... we process tag "nd" # for tag in element.iter("nd"): if not "node_refs" in node.keys(): node["node_refs"] = [] node["node_refs"].append(tag.attrib["ref"]) return node else: return None
re.split(pattern, string, max=0) # Проводит разбиение строки string на основе разделителя регулярного # выражения раttern и в случае успеха возвращает список полученных # сопоставлений, в котором содержится не более чем max фрагментов # (по умолчанию разбиение проводится по всем вхождениям) re.sub(pattern, repl, string, count=0) # Заменяет все вхождения шаблона регулярного выражения pattern в # строке string подстрокой repl, выполняя подстановку вместо всех # вхождений, если не задан параметр count (см. также функцию subn(), # которая, кроме этого, возвращает количество выполненных подстановок) re.purge() # Очищает кеш неявно скомпилированных шаблонов регулярного выражения re.group(num=0) # Возвращает все сопоставление (или конкретную подгруппу num) re.groups(default=None) # Возвращает все сопоставленные подгруппы в виде кортежа (если # сопоставления отсутствуют, кортеж пуст) re.groupdict(default=None) # Возвращает словарь, содержащий все согласованные именованные # подгруппы, в котором ключами являются имена подгрупп (если # сопоставления отсутствуют, словарь пуст) # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: re.I, re.IGNORECASE
import re import requests c=requests.get('http://blackfoxs.org/radar/puzzle/',headers={'user-agent':'9e9'}) re=re.search('id="desc">(.*?)</h2>',c.text) result=re.group(1) print(result) input()
import re fh = open("techinfo.txt") for line in fh: if re.search("python|perl",line): print(re.group()) fh.close()
import re p = re.compile(r'\W+') result = p.split("This is my first split example string") print(result) DOB = "25-01-1991#This is Date of Birth" # Delete Python-style comments Birth = re.sub(r'#.*$', "", DOB) print("Date of Birth : ", Birth) # Remove anything other than digits Birth1 = re.sub(r'\D', "", Birth) print("Before substituting DOB : ", Birth1) # Substituting the '-' with '.(dot)' New = re.sub(r'\W', ".", Birth) print("After substituting DOB: ", New) a = re.compile(r'\W+') m = re.group("string goes here") print(m)
self._do_work() steps_regex = re.compile("Step (\w).*step (\w)") with open(INPUT) as f: data = [x.strip() for x in f.readlines()] # parse datas graph = defaultdict(list) prereq = defaultdict(list) origin = set() target = set() for line in data: re = steps_regex.match(line) f = re.group(1) t = re.group(2) origin.add(f) target.add(t) graph[f].append(t) prereq[t].append(f) # nodes that are not targets starter = origin - target # part 1 available = list(starter) result = "" while len(available) > 0: available.sort() node = available.pop(0)
#sub:替换,去掉一个字符串的所有数字 strs = '34ibikalksajGDSOPX682j' strs = re.sub('\d+', '', strs) print(strs) print() #获取所有li节点的歌名,第一种方法 results = re.findall('<li.*?>\s*?(<a.*?>)?(\w+)(</a>)?\s*?</li>', html, re.S) for result in results: print(result[1]) print() #sub方法获取所有li节点的歌名,第二种方法 s = re.sub('<a.*?>|</a>', '', html) fs = re.findall('<li.*?>(.*?)</li>', s, re.S) # print(fs) for k in fs: print(k.strip()) #去掉字符串两边的空格或换行符 print() #目标使匹配齐秦 往事随风,发现其唯一的class=active #re.S:使.匹配包括换行符在内的所有字符,写了之后。不用在正则李写空白符\s了 # .: 匹配任意字符除了 换行 #* :前面的原子重复0次、1次到多次 {0,} #.*?:阻止贪婪匹配 re = re.search('li.*?active.*?singer="(.*?)">(.*?)</a>', html, re.S) print(re) print(re.group(1)) print(re.group(2)) print()
with xlsxwriter.Workbook(xlsx_file, {'default_date_format': 'yyyy-mm-dd hh:mm:ss'}) as workbook: worksheet = workbook.add_worksheet() row_no = 0 for filename in filenames: print(filename) with open(filename) as f: try: data = json.load(f) except JSONDecodeError as e: raise RuntimeError(f"For file {filename}") from e # Argument to put in the first column, if timestamp convert to date. first_field_value = filename try: if re.search('\\\\(\\d+)[^\\\\]*', first_field_value): first_field_value = datetime.datetime.fromtimestamp(int(re.group(1))) except ValueError: pass if first: # Identify a column for every key. column_for["_"] = len(column_for) for row in data: for key in row: if not key in column_for: column_width = [] column_for[key] = len(column_for) column_width = [0] * len(column_for) #