def parse_dates(dates): ''' Converts a string of dates in "[%d/%m/%y %I:%M %p,?]+" format into a list of datetime objects. The format specified is a mix of regular expression and datetime directives ''' # Defines the RegEx that will help parse the date rx_date = r'(?P<month>\d{1,2})[/\\\.-](?P<day>\d{1,2})[/\\\.-](?P<year>\d{2,4})' # Defines the RegEx that will help parse the time rx_time = r'(?P<hour>\d{1,2}):(?P<minute>\d{1,2})\s?(?P<segment>[aApP][mM])' # Defines the complete RegEx that will be used to parse an arbitrarily long # list of dates and times (with times being optional) rx_complete = r'(?P<cal_item>(?P<date>\d{1,2}[/\\\.-]\d{1,2}[/\\\.-]\d{2,4})(?:\s+(?P<time>\d{1,2}:\d{1,2}\s?[aApP][mM]))?)' result = [] '''Iterates through the arbitrarily long list of dates and times and parses them into datetime objects''' iter = re.finditer(rx_complete, dates) for i in iter: date = re.match(rx_date, i.group('date')) day = date.group('day') month = date.group('month') year = date.group('year') if i.group('time'): time = re.match(rx_time, i.group('time')) hr = time.group('hour') min = time.group('minute') seg = time.group('segment') if re.match(r'[pP][mM]', seg): hr = str(int(hr)+12) result.append( datetime.datetime(int(year), int(month), int(day), int(hr), int(min))) else: result.append(datetime.datetime(int(year), int(month), int(day))) return result
def rightNow(city): city_code = CITY.get(city) if city_code == None: return None data = requests.get(URL.format(city_code, KEY, 'base')).json() if data['count'] == '0': return None rData = data['lives'][0] weather = rData['weather'] + IMAGES.get(rData['weather']) # for k, v in IMAGES.items(): # if k == rData['weather']: # weather = rData['weather']+v temperature = rData['temperature'] + "℃" wind = rData['winddirection'] + "风 " + rData['windpower'] + "级" humidity = rData['humidity'] + "%" time = rData['reporttime'] time = re.search("^\d+-(\d+)-(\d+)\s(\d+:\d+)", time) time = time.group(1) + "月" + time.group(2) + "日 " + time.group(3) + "实况" message = city + "\n" + \ time + "\n" + \ "天气:" + weather + "\n" + \ "温度:" + temperature + "\n" + \ "风力:" + wind + "\n" + \ "湿度:" + humidity return message
def get_stock_value(time): time = re.search(r'(.*?) (.*?) (.*?) (.*?):(.*?):(.*?) (.*?)$', time, re.M | re.I | re.U) name = 'stock_value.csv' time_tweet = [ int(time.group(4)) + 6, int(time.group(5)), int(time.group(6)) ] if time_tweet[0] > 19: time_tweet = [13, 30, 00] stock = pd.read_csv(name) time_fin = int(time_tweet[0]) * 3600 + int(time_tweet[1]) * 60 + int( time_tweet[2]) time_fin1 = [time_fin // 3600, (time_fin % 3600) // 60, (time_fin % 60)] time_final1 = int(time_tweet[0]) * 3600 + int(time_tweet[1]) * 60 + int( time_tweet[2]) + 10 * 60 time_final = [ time_final1 // 3600, (time_final1 % 3600) // 60, (time_final1 % 60) ] stock_value1 = stock.loc[(stock['Hour'] == time_fin1[0]) & (stock['Minute'] == time_fin1[1])]['Quote'].values len(stock_value1) stock_value = stock.loc[(stock['Hour'] == time_final[0]) & (stock['Minute'] == time_final[1])]['Quote'].values return stock_value, time_tweet, stock_value1
def parse_schedule(s, pat=re.compile(r"(\d{1,2}):(\d{2})")): time = pat.match(s) if not time: log.warn( "Invalid time format for BACKUP_DELETE_SCHEDULE: %s - using default schedule at %s", s, default_schedule) time = pat.match(default_schedule) return {'hour': int(time.group(1)), 'minute': int(time.group(2))}
def format_time(self, time): date = self.format_notice_time(time) h = time.group(4) m = time.group(5) if len(h) == 1: h = '0' + h if len(m) == 1: m = '0' + m return date + ' ' + h + ':' + m
def get_kek(dest): url = 'http://rcwww.kek.jp/norm/dose.html' html = urllib2.urlopen(url).read() value = re.search(r'<b>\s*([\d.]+)', html).group(1) time = re.search(r'\((?P<year>\d{4})-(?P<mon>\d{1,2})-(?P<day>\d{1,2}) ' '(?P<hour>\d{1,2}):(?P<min>\d{1,2})', html) dest.set_value(datetime.datetime(int(time.group('year')), int(time.group('mon')), int(time.group('day')), int(time.group('hour')), int(time.group('min'))), 3, float(value)) return dest
def get_kek(dest): url = 'http://rcwww.kek.jp/norm/dose.html' html = urllib2.urlopen(url).read() value = re.search(r'<b>\s*([\d.]+)', html).group(1) time = re.search( r'\((?P<year>\d{4})-(?P<mon>\d{1,2})-(?P<day>\d{1,2}) ' '(?P<hour>\d{1,2}):(?P<min>\d{1,2})', html) dest.set_value( datetime.datetime(int(time.group('year')), int(time.group('mon')), int(time.group('day')), int(time.group('hour')), int(time.group('min'))), 3, float(value)) return dest
def __CheckFlows(self, line, Flows): if debugLog >= debugLogLevel[-1]: print 'CheckFlow: ' + line, Flows for key, values in Flows.items(): for i in values.split(ConfigFileSplitSym): pattern = re.compile(i) m = re.search(pattern, line) if m: if debugLog >= debugLogLevel[2]: print 'Find Flows: ' + i + '\n' + line timeFormat = re.compile(r'\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}') time = re.match(timeFormat, line) if self.__CameraFlowStep != key: if debugLog >= debugLogLevel[2]: print 'Change Flow ' + key + ' to ' + self.__CameraFlowStep self.__FlowsNum += 1 self.__CameraFlows.append(time.group() + ' ' + key + '\n') self.__CameraFlowStep = key self.__CameraFlowsLog.append(line) if debugLog >= debugLogLevel[2]: print 'Not Finish ' + key + '\n'
def match(self, str): """ Extract time Ex line: 168.235.196.131 [24/Oct/2016:00:02:40 -0700] 0.000 https .com "GET /staticx/udemy/css/fancybox_overlay.png HTTP/1.1" 404 162 Will extract 24/Oct/2016:00:02:40, Get hour and minute and concatenate (02:40 -> 0240) Typecast to int Do conditional using self.cmp statement Args: str (:str): Single line """ time = re.search(r'[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}', str) if time: tt = time.group().split(':') strtime = tt[1] + tt[2] itime = int(strtime) return self.statement( itime, self.timer ) #conditional using two number (02:40 > 01:34 -> 240 > 134)
def run(self): if self.iperf3: iperf_cmd = 'timeout -k 1 320 iperf3 -c {} -t 300'.format( self.target) else: iperf_cmd = 'timeout -k 1 320 iperf -c {} -t 300'.format( self.target) print("Running iperf...") iperf = subprocess.Popen(shlex.split(iperf_cmd)) ping_cmd = 'ping -I {} {}'.format(self.interface, self.target) ping = subprocess.Popen(shlex.split(ping_cmd), stdout=subprocess.PIPE) iperf.communicate() ping.terminate() (out, err) = ping.communicate() if iperf.returncode != 0: return iperf.returncode print("Running ping test...") result = 0 time_re = re.compile('(?<=time=)[0-9]*') for line in out.decode().split('\n'): time = time_re.search(line) if time and int(time.group()) > 2000: print(line) print("ICMP packet was delayed by > 2000 ms.") result = 1 if 'unreachable' in line.lower(): print(line) result = 1 return result
def handle_data(self, data): if data.strip(): #去除空格 if self.lasttag == 'h1' and self.iftitle and self.ifFirst: #标题 #print("Encountered title :", data) self.news.title = data if self.iftime and self.ifFirst: #时间 time = re.search(re_time, data) if time: #print("Encountered time :", time.group()) self.news.time = time.group() if self.lasttag == 'p' and self.iftext: #正文内容 pageinfo = re.search(re_page, data) if pageinfo: if self.ifFirst: # 若是第一页,读取页数信息 num = re.search(re_pagenum, pageinfo.group()) self.pageNum = num.group().__len__() #print("Encountered pageinfo :", self.pageNum) else: #print("Encountered text :", data) if self.news.text == None: self.news.text = data else: self.news.text += data
def Movie(movieName): cookieJar = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar)) url = "http://www.imdbapi.com/?t=" + movieName request = urllib2.Request(url) page = opener.open(request) # This is one big string rawdata = page.read() title = re.search('"Title":"(.+?)"', rawdata) if title: titleOut = title.group(1) movieInfo.insert(0, "Title: " + titleOut) year = re.search('"Year":"(.+?)"', rawdata) if year: yearOut = year.group(1) movieInfo.insert(1, "Year: " + yearOut) time = re.search('"Runtime":"(.+?)"', rawdata) if time: timeOut = time.group(1) movieInfo.insert(2, "Runtime: " + timeOut) rate = re.search('"imdbRating":"(.+?)"', rawdata) if rate: rateOut = rate.group(1) movieInfo.insert(3, "Rating: "+ rateOut) error = re.search('"Error":"(.+?)"', rawdata) if error: errorOut = error.group(1) movieInfo.insert(4, errorOut) return movieInfo
def Movie(movieName): cookieJar = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar)) url = "http://www.imdbapi.com/?t=" + movieName request = urllib2.Request(url) page = opener.open(request) # This is one big string rawdata = page.read() title = re.search('"Title":"(.+?)"', rawdata) if title: titleOut = title.group(1) movieInfo.insert(0, "Title: " + titleOut) year = re.search('"Year":"(.+?)"', rawdata) if year: yearOut = year.group(1) movieInfo.insert(1, "Year: " + yearOut) time = re.search('"Runtime":"(.+?)"', rawdata) if time: timeOut = time.group(1) movieInfo.insert(2, "Runtime: " + timeOut) rate = re.search('"imdbRating":"(.+?)"', rawdata) if rate: rateOut = rate.group(1) movieInfo.insert(3, "Rating: " + rateOut) error = re.search('"Error":"(.+?)"', rawdata) if error: errorOut = error.group(1) movieInfo.insert(4, errorOut) return movieInfo
def parseAvgTime(line): time = re.search(TIME_REGEX, line) if time is not None: time = time.group(0) time = TIME_REGEX_SPLIT.split(time) if len(time) == 2: return float(time[1]) return None
def latencia(host='127.0.0.1' ): ## este método testa o QoS relativo à latência do meio status, result = ("", "") if platform.system() == "Linux": status, result = sp.getstatusoutput("ping -c1 %s " % host) time = re.search('time=([0-9]{1,4}\.[0-9]{1,4})', result) print(time) if time: f = time.group(1) return float(f) return 0.0 else: status, result = sp.getstatusoutput("ping -c1 -w2 %s > nul" % host) time = re.search('time=([0-9]{1,4}\.[0-9]{1,4})', result) if time: f = time.group(1) return float(f) return 0.0
def write(self, msg): time = re.search(r'\d+-\d+\s+\d+:\d+:\d+\.\d+', msg) tag = re.search(r'\w\s\w+\s*:\s+|\[\d+: .*\]', msg) if time and tag: date_time = datetime.datetime.strptime(time.group(),'%m-%d %H:%M:%S.%f') self.csr.execute('INSERT INTO Unit VALUES (?,?,?)', (date_time, tag.group(0), msg))
def get_time2(times): tfactor = 1.0 curr = datetime.now() feat = 43200.0 for t in times: #print t time = re.match('(\d+):(\d+)',t) nearest = [] nearest.append(abs(datetime(curr.year, curr.month, curr.day, int(time.group(1)), int(time.group(2)))-curr)) nearest.append(abs(datetime(curr.year, curr.month, curr.day-1, int(time.group(1)), int(time.group(2)))-curr)) nearest.append(abs(datetime(curr.year, curr.month, curr.day+1, int(time.group(1)), int(time.group(2)))-curr)) closest = min(nearest) if closest.seconds < feat: feat = closest.seconds #print closest.seconds #print feat return feat/43200.0/tfactor
def timeParamIdx(input: str) -> int: time = re.search(r"time=(\d*)ms", input) if (time): return time.group(1) else: return -1
def get_dt(self, fn): find_date = re.compile('_\d{1,8}(?=_\d{1,4}\.)')#('\d{8}(?=_\d{4}\.)') find_time = re.compile('(?<=\d_)\d{4}(?=\.)')#'(?<=\d{8}_)\d{4}(?=\.)') find_dt = re.compile('\d{1,8}_\d{1,4}(?=\.)')#'\d{8}_\d{4}(?=\.)') date = find_date.search(fn) time = find_time.search(fn) dt = find_dt.search(fn) if date and time and dt: return date.group(),time.group(),dt.group()
def text_data(text_lines): measure_rate = re.search(r'\d+.\d+', text_lines[3]).group() measure_rate = float(measure_rate) name = re.search(r'Roman_\w+', text_lines[2]) if name is None: name = re.search(r'Accuracy\w+', text_lines[2]) time = re.search(r'\d+:\d+:\d+', text_lines[2]) date = re.search(r'\d+-\d+-\d+', text_lines[2]) trial_len = re.search(r'\d+\.\d+', text_lines[4]) return name.group(), time.group(), date.group(), trial_len.group(), measure_rate
def analyze(read): topic = "OOPS" if re.match('.* - [0-2][0-9]:[0-5][0-9]\n', read): print "Timed: " + read found = re.search('.* (?=- [0-2][0-9]:[0-5][0-9])', read) time = re.search('[0-2][0-9]:[0-5][0-9]', read) topic = found.group(0) + "\n" if topic == "OOPS": return read, topic else: return topic, time.group(0)
def parse_time_detail(self, time): """ Returns minute of detail (INT) """ time = re.search("<td class=\"timeCol\">((?P<time>[\d]{1,3})\')?</td>", time) try: if type(time.groups()) != type(()): return "" except AttributeError: return "" return time.group('time')
def getDates(text): match = re.search('The Fore River Bridge is scheduled to open at(.+?)to allow', text) dates = [] time_pattern = re.compile(r'(\d{1,2} a.m.?|\d{1,2} p.m.?|\d{1,2}:\d{2} a.m.?|\d{1,2}:\d{2} p.m.?)') if match != None: sentence = match.group(1) start_at = 0 for date in re.finditer(r'(\w+, \w+\.* [0-9]{1,2})', sentence): for time in time_pattern.finditer(sentence, pos=start_at, endpos=date.span()[0]): dates.append(time.group(1) + " " + date.group(1)) start_at = date.span()[1]; return dates
def __do_analysis(self, event): """ 开始分析 :param event: :return: """ if self.fileText.GetValue() == '': self.warnLabel.SetLabel('请选择日志!!') return if self.cmd not in comands.cmds: self.warnLabel.SetLabel('请选择命令!!') return output = FileTool.readObject(self.fileText.GetValue()) x1 = [] y1 = [] statisticInfo1 = StatisticInfo('post request cost:') x2 = [] y2 = [] statisticInfo2 = StatisticInfo('sum cost') p1 = 'post_request_cost:.*:{}:'.format(self.cmd) pattern1 = '{}\d+'.format(p1) p2 = '耗时:{}:'.format(self.cmd) pattern2 = '{}\d+'.format(p2) time_pattern = '^\\[\d{2}:\d{2}:\d{2}:\d{3}\\]' for line in output: time = re.match(time_pattern, line) if time is None: continue timestr = re.sub('[\\[\\]]', '', time.group(0)) mlis = self.format_date(timestr) st = re.search(pattern1, line) if st is not None: t = re.sub(p1, '', st.group(0)) x1.append(mlis) y1.append(int(t)) statisticInfo1.count(int(t)) continue st1 = re.search(pattern2, line) if st1 is not None: t = re.sub(p2, '', st1.group(0)) x2.append(mlis) y2.append(int(t)) statisticInfo2.count(int(t)) self.desc.SetLabel(statisticInfo1.printStatisticInfo()) self.desc1.SetLabel(statisticInfo2.printStatisticInfo()) self.draw(x1, y1, x2, y2, self.cmd)
async def log(self, ctx, time: str, amount: int = 10, channel: discord.Channel = None): time = reg.match(time) if channel == None: channel = ctx.message.channel def none0(val): return int((val, 0)[val == None]) datedif = datetime.now() - timedelta(days=none0(time.group(2)), hours=none0(time.group(4)), minutes=none0(time.group(6)), seconds=none0(time.group(8))) ##await self.bot.say("Time: `{}d{}h{}m{}s` Amt: `{}` Ch: `{}`".format(time.group(2), time.group(4), time.group(6), time.group(8), amount, (channel, "all")[channel == None])) ##status = await self.bot.say("Searching logs for messages from all users in {}.".format((channel.mention, "all channels")[channel == "*"])) cursor.execute( "SELECT * FROM `{}` WHERE `timestamp` >= '{}' ORDER BY `id` ASC LIMIT {}" .format(channel.id, datedif, amount)) data = cursor.fetchall() msg = "Done! Found {} messages: ```".format(len(data)) m = 0 for entry in data: user = discord.utils.get(self.bot.get_all_members(), id=entry[1]) m += 1 msg += "\n{} #{} @{} >> \"{}\"".format(entry[4], entry[0], entry[1], entry[3]) if len(msg) < 1998: msg += "```" while len(msg) > 6: await self.bot.send_message( ctx.message.channel, msg[0:2000] + ("```", "")[len(msg) < 1998]) msg = "```{}```".format(msg[1994:]) """
def parseTweet(tweet): # https://regex101.com/#python # Obviously we are just putting faith in the formatting of @tripleJplay songRegex = re.compile(r"(?<=-\W).+?(?=\W\[)") artistWithTwitterRegex = re.compile(r'(?<=@).+?(?=\W)') # aristWithoutTwitterRegex = re.compile(r'.+(?=\W-)') featureArtistTwitterRegex = re.compile(r'(?<=ft.\W\@).+?(?=\W)') timeRegex = re.compile(r'(?<=\[)[0-9]{2}:[0-9]{2}(?=\])') # (?<=\[).+(?=\]) #THIS IS VERY SPECIFIC song = songRegex.search(tweet.text) artist = artistWithTwitterRegex.search(tweet.text) feat = featureArtistTwitterRegex.search(tweet.text) time = timeRegex.search(tweet.text) day = tweet.created_at.day month = tweet.created_at.month year = tweet.created_at.year if song: song = song.group() else: song = "^Song Not Found!^" if artist: artist = artist.group() else: artistNoTwitter = aristWithoutTwitterRegex.search(tweet.text) if artistNoTwitter: artist = artistNoTwitter.group() else: artist = "!Artist Not Found!" if feat: feat = feat.group() else: feat = '*Feature Artist Not Found*' if time: time = time.group() else: time = "%Time Not Found!%" #use pytz to put the time in sydney time sydney = timezone('Australia/Sydney') tweet.created_at = timezone('UTC').localize(tweet.created_at) #Attach UTC time to tweet date_object = tweet.created_at.astimezone(sydney) #Convert to sydney time date_object = date_object.replace(hour=int(str(time)[:2]), minute=int(str(time)[3:])) #get the hour and minute from the actual tweet return {'song': song, 'artist': artist, 'feat': feat, 'time': date_object, 'raw': tweet.text, 'created_at': tweet.created_at, 'id': tweet.id}
def CheckLogs(self, Flows, ErrLogs, KeyWords): if debugLog >= debugLogLevel[-1]: print 'CheckLogs: ', self.__filename while 1: line = self.__fd.readline() if not line: if debugLog >= debugLogLevel[2]: if self.__endTime != '': print 'End Time:', self.__endTime print '(INFO) Finish Parse file: ' + self.__filename print '(INFO) lines: ', self.__logLines, '\n' break timeFormat = re.compile(r'\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}') time = re.match(timeFormat, line) if time and self.__logLines == 0: self.__beginTime = time.group() if debugLog >= debugLogLevel[2]: print 'Begin Time:', self.__beginTime else: if time: self.__endTime = time.group() if debugLog >= debugLogLevel[-1]: print 'INFO: Read line --->' + line self.__CheckFlows(line, Flows) self.__CheckErrLogs(line, ErrLogs) self.__CheckKeyWords(line, KeyWords) self.__logLines += 1
def get_trtime_position(self, grab): text = '' time = '' dt = '' # [last()] # grab.doc.select('//th'): try: #print grab.doc.select('//tr[last()]').select('./td[1][.//b]').html() text = grab.doc.select('//tr[last()]').select('./td[1]/b').text() dt = datetime.strptime(text, "%d.%m.%Y %H:%M") except: text = grab.doc.select('//tr[last()]').select('./td[1]').text() finally: time = re.search("(\d{2}:\d{2})", text) if time: print text, time.group().strip().lower()
def getPost(filename): mes = None ti = None frm_zid = None file = open(filename, 'r', encoding='utf-8') for line in file: message = re.match(r"message:\s(.+)", line) time = re.match(r"time:\s(.+)\+", line) frm = re.match(r"from:\sz(\d+)", line) if message: mes = message.group(1) if time: ti = time.group(1).replace('T', ' ') if frm: frm_zid = frm.group(1) if mes and ti and frm_zid: return [mes, ti, frm_zid]
def create_stats(): logger = create_logger() Files.generate_all(logger) # "sort_algo","entry_lines","split","time(sec)","cpu(%)" line = '"%s",%d,%d,%s,%s\r\n' # Launch stats for p in Project: # each project version stats=open(HOME+"stats/Project_"+p.name+".csv", "a+") stats.write('\r\n"sort_algo","entry_lines","split","time(sec)","cpu(%)"\r\n') stats.close() for alg in SortAlgo: # each sort algo # set algo active SortAlgo.set(alg, Project.__file__%(p.value)) logger.info("SortAlog set %s in %s"%( alg.name, Project.__file__%(p.value) )) # compile call(MAKE, logger=logger, shell=True) for f in Files: # for each file size for i in range(1,11): # 1 to 10 split tmp = f.value[0].split(".") sorted_file = ".".join(tmp[:-1] + ["sort"] + tmp[-1:]) command = CMD%(HOME, Project.__m_option__%(p.value), f.value[0], sorted_file, i) logger.info(command) outs, errs = call(command, logger=logger, is_stats=True, shell=True) # write results of this test res = (outs + errs).decode() cpu = re.match(r"[^']*Percent of CPU this job got: (?P<cpu>[0-9]{1,4})%[^']*", res) time = re.match(r"[^']*Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (?P<time>[0-9]?:[0-9]{2}\.[0-9]{2})[^']*", res) if time is not None: time = time.group(1) tmp_time = int(time.split(":")[0]) *60 # minutes time = tmp_time + float(time.split(":")[1]) # seconds cpu = int(cpu.group(1)) if cpu is not None else "err" if time is None: time = "err" elif time is not None and time >= TIMEOUT: time = "inf" stats=open(HOME+"stats/log/Project_"+p.name+".csv", "a+") stats.write(line % (alg.name, f.value[1], i, time, cpu)) stats.close()
def customerParseItem(self, response, item, **kwargs): print('customerParseItem') sel = Selector(response=response) item["publish_time"] = '' item['author'] = '' text0 = sel.xpath('//*[@class="info"]/i/a/text()').extract_first() if text0: item['source'] = text0.split(" ")[0] text1 = sel.xpath('//*[@class="info"]/i/text()').extract() if text1: for i in text1: if "年" in i: pattern = re.compile('\d{4}年\d{2}月\d{2}日') time = re.search(pattern, i) item['publish_time'] = time.group() return item
def standard_work_article(target_url): return_data = [] headers = {'User-Agent': UserAgent} res = requests.get(target_url, headers=headers) res.raise_for_status() reg_content = res.content.decode('utf8') html_page = bs4.BeautifulSoup(reg_content, 'lxml') infos = '' try: infos = html_page.find(class_='content').findAll('p') except AttributeError: infos = html_page.find(id='container_txt').findAll('p') for one_info in infos: content_dir = re.search('<p[\\s\\S]+/p>', str(one_info)) if content_dir: need_content = one_info.text else: if isinstance(one_info, bs4.NavigableString): need_content = one_info else: continue if not need_content.strip(): continue return_data.append(need_content.strip()) try: time = html_page.find('h2').text datetime_dir = re.match( '.+:(?P<year>\d{4})-(?P<month>\d+?)-(?P<day>\d+?) (?P<hour>\d+?):(?P<minute>\d+?):.+', time) except AttributeError: time = html_page.find(id='pubtime_baidu').text.strip() time = re.search(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2}:\d{1,2})", time) time = time.group(0) datetime_dir = re.match( '(?P<year>\d{4})-(?P<month>\d+?)-(?P<day>\d+?) (?P<hour>\d+?):(?P<minute>\d+?):\d+', time) tt_tmp = '%s-%s-%s %s:%s' % (datetime_dir['year'], datetime_dir['month'], datetime_dir['day'], datetime_dir['hour'], datetime_dir['minute']) _datetime = 0 if datetime_dir: _datetime = get_time('%Y-%m-%d %H:%M', tt_tmp) _title = html_page.find('h1').text return _datetime, '%s<replace title>%s' % (_title, '\n'.join(return_data))
def __init__(self, header, message): header = header.strip().lstrip('-!>') #message = message.strip() self.inp = (header, message) data = header.rsplit(':', 3) time = self._timepat.match(data.pop(0)) if time: time = time.group() else: time = '-' if len(data) == 1: data.append('') if len(data) == 2: data.append('') level, file_, line = data[:3] extra = tuple(data[3:]) self.update(time=time, level=level, file=file_, line=line) self['message'] = message
def get_status(self,user_num): page = 0 result = "none" #记录搜索结果 time_form = re.compile("dtime\":\".*?\s") required_time = "2015-05-29" #记录期望搜索时间 search = 1 #是否继续搜索 try: while search: more_result = self.opener.open("http://status.renren.com/GetSomeomeDoingList.do?userId=" + user_num + "&curpage=" + str(page) + "&_jcb=jQuery1111031641900399699807_1433395719580&requestToken=379990670&_rtk=e26730e8&_=1433395719581") result = result + more_result.read() time = time_form.search(more_result) if (cmp(time.group(0)[8:],required_time) == -1) or (page > 100): #在此之前已经没有匹配状态,自动跳出搜索 search = 0 page = page + 1 finally: #fp = open("user/" + user_num + ".txt",'w') #fp.write(result) #fp.close() return result
def process_time_step(message): try: time = message.text if time == 'Отмена': cancel_adding_event(message.chat.id) return chat_id = message.chat.id event = event_dict[chat_id] time = re.search(r'\d{2}[:|/|-|.| ]\d{2}', time) if time == None: msg = bot.reply_to(message, 'Введите время в формате (ЧЧ:ММ)') bot.register_next_step_handler(msg, process_date_step) event.time = re.sub(r'[:|/|-|.| ]', ':', time.group()) markup = types.ReplyKeyboardMarkup(one_time_keyboard=True, resize_keyboard=True) markup.add('Нет ссылки', 'Отмена') msg = bot.reply_to(message, 'Введите ссылку', reply_markup=markup) bot.register_next_step_handler(msg, add_new_event_url) except Exception as e: print("Exception: " + str(e)) bot.reply_to(message, 'Введите время в формате (ЧЧ/ММ)')
def writeComment(self, dataList): for list in dataList: list = str(list) soup = BeautifulSoup(list) # 获取赞这个评论或者答案的总数 pzanCount = str(soup.find('span', class_='count')) # 获取某个评论或答案的内容 pcontent = str(soup.find('div', class_='zm-editable-content')) # 获取发表评论或者答案的时间 ptime = str(soup.find('a', class_='answer-date-link')) zanCount = re.search('<span.*?>(.*?)</span>', pzanCount, re.S) content = re.search('<div.*?>(.*?)</div>', pcontent, re.S) time = re.search('<a.*?>(.*?)</a>', ptime, re.S) self.fp.write("赞同的人数:" + zanCount.group(1) + "\n") contentText = re.sub('<.*?>', '', content.group(1)) self.fp.write("评论的内容:\t" + contentText + "\n") self.fp.write("\t\t" + time.group(1) + "\n\n") replayID = re.search( '<div.*?class="zm-item-answer.*?zm-item-expanded".*?data-aid="(.*?)".*?>', list, re.S) self.getSubComment(replayID.group(1))
def onHandleTime(): timeString = getLastLoginTime() timeRegex = "\d\d\d\d-(0?[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01]) (00|[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9]):([0-9]|[0-5][0-9])([0-9]|[0-5][0-9])" time = re.search(timeRegex, timeString) return time.group(0)
for log in logs: print log if len(log) < 3: break fp = open(log) while(1): line = fp.readline() if not line: break m = re.match(r'.*'+strm+r'.*',line) if m: time = re.search(r'\[.*\]',line) if time: time = time.group(0) time = re.sub(r'\[|\]','',time) #sdata = re.split(r':',time) #date = sdata[0] #hour = sdata[1] #min = sdata[2] #second = re.split(r'\s+',sdata[3])[0] time_original = re.split(r' +',time)[0] user = re.search(r'user_id=\d+?&',line) if user: userid= user.group(0) userid = re.split(r'=',userid)[1] userid = re.split(r'&',userid)[0] mobile = re.search(r'mobile=\d+?&',line)
def isTimeover(self, a, b): time = re.search(r'(.*?):(.*?):(.*?) (.*?):(.*?):(.*)', a) year = time.group(1) month = time.group(2) day = time.group(3) hour = time.group(4) min = time.group(5) second = time.group(6) # print year, month, day, ":", hour, min, second time1 = re.search(r'(.*?):(.*?):(.*?) (.*?):(.*?):(.*)', b) year1 = time.group(1) month1 = time.group(2) day1 = time.group(3) hour1 = time.group(4) min1 = time.group(5) second1 = time.group(6) # print year, month, day, ":", hour, min, second if year > year1: return True if month > month: return True if day > day1: return True if hour > hour1: return True if min > min1: return True
def parse_Time (tym): ''' returns a tuple (standardized time string, list of error strings) # # Requires: tym - string; a time (HH:MM PM) where the AM/PM # designation is optional # Effects: parses tym and converts it to the standard WTS time # format (HH:MM PM). Maintains a list of any errors # encountered in parsing. Returns a tuple with either: # (time string, None) if no errors, or ('', list of error # strings) if errors were found. # Modifies: no side effects ''' global START, END # we use regular expressions to do our initial examination and # splitting of tym into its component pieces. Set up the regex # strings we'll use: re_ampm = '\([AP]M\)' # AM/PM re_hhmm = '\([012 ]?[0-9]\):\([0-5][0-9]\)' # HH:MM # now, compile the regular expressions to look for recognizable tym # formats (for description, see above): timeampm = regex.compile ('.*' + re_ampm) # is AM/PM anywhere? time = regex.compile (START + re_hhmm + END) # time without AM/PM fulltime = regex.compile (START + re_hhmm + '[ \t]*' + re_ampm + END) # time with AM/PM ampm = None # no AM/PM designation yet hours = None # no hours yet minutes = None # no minutes yet # now, try to match tym to one of the formats (one with AM/PM, one # without) if (timeampm.match (tym) >= 0): if (fulltime.match (tym) >= 0): (hours, minutes, ampm) = fulltime.group (1, 2, 3) elif (time.match (tym) >= 0): (hours, minutes) = time.group (1, 2) # if hours is still None, then we know it didn't match. bail out. if not hours: return ('', [ 'Could not recognize time: %s' % tym ]) # otherwise, we have no errors yet. So, get integer values for hours # and minutes. Then error-check the hours. Because of the regular # expression matching [0-5][0-9], we know that minutes must be in the # correct range (00-59). errors = [] hours = string.atoi (hours) minutes = string.atoi (minutes) if (hours < 1) or (hours > 23): errors.append ('Hours out of range: %d' % hours) # if ampm not yet defined, then set the AM/PM designation and adjust # the hours if necessary (convert 24-hour clock to 12-hour) if not ampm: if (hours >= 12): ampm = 'PM' if (hours > 12): hours = hours - 12 else: ampm = 'AM' # otherwise, check that the designation is correct. Note that a PM # designation with hours > 12 is not necessarily incorrect, but rather # just redundant. (so adjust it to reflect a 12-hour clock) else: if (ampm == 'AM') and (hours > 12): errors.append ('AM designation is incorrect for ' + \ '%d hours' % hours) elif (ampm == 'PM') and (hours > 12): hours = hours - 12 # now, return appropriate tuple if len (errors) > 0: return ('', errors) else: return (('%s:%s %s' % (string.zfill(hours,2), string.zfill(minutes,2), ampm), None))
def __ScanCamLog(self, fd): if debugLog >= debugLogLevel[2]: print "INFO: begin scan camera log!" while 1: line = fd.readline() if not line: if debugLog >= debugLogLevel[2]: print "INFO: Finish Parse file!\n" break if debugLog >= debugLogLevel[-1]: print "INFO: Read line is :" + line for i in range(0, len(AppLogType.CamLog)): # Adapter every key tag if debugLog >= debugLogLevel[-1]: print "INFO: Camera log-> " + AppLogType.CamLog[i] log = re.compile(AppLogType.CamLogPattern[i]) if debugLog >= debugLogLevel[-1]: print "INFO: Scan log-> " + log.pattern search = re.search(log, line) if search: if debugLog >= debugLogLevel[1]: print "INFO: Search Camera log->" + search.group() print "line is: " + line timeFormat = re.compile(r"\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+") if debugLog >= debugLogLevel[2]: print "INFO: TimeFormat-> " + timeFormat.pattern time = re.search(timeFormat, line) if time: if debugLog >= debugLogLevel[2]: print "INFO: Find key time-> " + time.group() # patch-> cal tag position and write to the right pos if debugLog >= debugLogLevel[1]: tags = search.group() print tags # print 'INFO: pos is '+str(AppLogType.CamLogPos.index(tags))+' time list len is'+str(len(self.__time)) pos = AppLogType.CamLogPos.index(search.group()) - len(self.__time) if pos: if debugLog >= debugLogLevel[-1]: print "WARNING: There is " + str(pos) + " data null!!!" for i in range(0, pos): self.__time.append(time.group()) self.__time.append(time.group()) if debugLog >= debugLogLevel[2]: print "INFO: Time -> " + self.__time[i] ## Save one group if i == len(AppLogType.CamLog) - 1: if debugLog >= debugLogLevel[-1]: print "INFO: Record one goup len num -> " + str(len(AppLogType.CamLog) - 1) if debugLog >= debugLogLevel[1]: print self.__time # Save a group self.__CamLogList.append(self.__time) # calc KPI self.__CalKPI(self.__time) self.__time = []