Ejemplo n.º 1
def parse_dates(dates):
    ''' Converts a string of dates in "[%d/%m/%y %I:%M %p,?]+" format 
    into a list of datetime objects.  The format specified is a mix 
    of regular expression and datetime directives '''    
    # Defines the RegEx that will help parse the date
    rx_date = r'(?P<month>\d{1,2})[/\\\.-](?P<day>\d{1,2})[/\\\.-](?P<year>\d{2,4})'
    # Defines the RegEx that will help parse the time
    rx_time = r'(?P<hour>\d{1,2}):(?P<minute>\d{1,2})\s?(?P<segment>[aApP][mM])'
    # Defines the complete RegEx that will be used to parse an arbitrarily long
    #   list of dates and times (with times being optional)
    rx_complete = r'(?P<cal_item>(?P<date>\d{1,2}[/\\\.-]\d{1,2}[/\\\.-]\d{2,4})(?:\s+(?P<time>\d{1,2}:\d{1,2}\s?[aApP][mM]))?)'
    result = []
    '''Iterates through the arbitrarily long list of dates and times 
    and parses them into datetime objects'''
    iter = re.finditer(rx_complete, dates)
    for i in iter:
        date = re.match(rx_date, i.group('date'))
        day = date.group('day')
        month = date.group('month')
        year = date.group('year')
        if i.group('time'):
            time = re.match(rx_time, i.group('time'))
            hr = time.group('hour')
            min = time.group('minute')
            seg = time.group('segment')            
            if re.match(r'[pP][mM]', seg):
                hr = str(int(hr)+12)
                    datetime.datetime(int(year), int(month), int(day),
                                      int(hr), int(min)))
            result.append(datetime.datetime(int(year), int(month), int(day)))
    return result
Ejemplo n.º 2
def rightNow(city):
    city_code = CITY.get(city)
    if city_code == None:
        return None

    data = requests.get(URL.format(city_code, KEY, 'base')).json()
    if data['count'] == '0':
        return None

    rData = data['lives'][0]
    weather = rData['weather'] + IMAGES.get(rData['weather'])
    # for k, v in IMAGES.items():
    #     if k == rData['weather']:
    #         weather = rData['weather']+v
    temperature = rData['temperature'] + "℃"
    wind = rData['winddirection'] + "风 " + rData['windpower'] + "级"
    humidity = rData['humidity'] + "%"
    time = rData['reporttime']
    time = re.search("^\d+-(\d+)-(\d+)\s(\d+:\d+)", time)
    time = time.group(1) + "月" + time.group(2) + "日 " + time.group(3) + "实况"

    message = city + "\n" + \
              time + "\n" + \
              "天气:" + weather + "\n" + \
              "温度:" + temperature + "\n" + \
              "风力:" + wind + "\n" + \
              "湿度:" + humidity

    return message
Ejemplo n.º 3
def get_stock_value(time):
    time = re.search(r'(.*?) (.*?) (.*?) (.*?):(.*?):(.*?) (.*?)$', time,
                     re.M | re.I | re.U)
    name = 'stock_value.csv'
    time_tweet = [
        int(time.group(4)) + 6,
    if time_tweet[0] > 19:
        time_tweet = [13, 30, 00]
    stock = pd.read_csv(name)
    time_fin = int(time_tweet[0]) * 3600 + int(time_tweet[1]) * 60 + int(
    time_fin1 = [time_fin // 3600, (time_fin % 3600) // 60, (time_fin % 60)]
    time_final1 = int(time_tweet[0]) * 3600 + int(time_tweet[1]) * 60 + int(
        time_tweet[2]) + 10 * 60
    time_final = [
        time_final1 // 3600, (time_final1 % 3600) // 60, (time_final1 % 60)
    stock_value1 = stock.loc[(stock['Hour'] == time_fin1[0]) &
                             (stock['Minute'] == time_fin1[1])]['Quote'].values
    stock_value = stock.loc[(stock['Hour'] == time_final[0]) &
                            (stock['Minute'] == time_final[1])]['Quote'].values
    return stock_value, time_tweet, stock_value1
Ejemplo n.º 4
def parse_schedule(s, pat=re.compile(r"(\d{1,2}):(\d{2})")):
    time = pat.match(s)
    if not time:
            "Invalid time format for BACKUP_DELETE_SCHEDULE: %s - using default schedule at %s",
            s, default_schedule)
        time = pat.match(default_schedule)
    return {'hour': int(time.group(1)), 'minute': int(time.group(2))}
Ejemplo n.º 5
 def format_time(self, time):
     date = self.format_notice_time(time)
     h = time.group(4)
     m = time.group(5)
     if len(h) == 1:
         h = '0' + h
     if len(m) == 1:
         m = '0' + m
     return date + ' ' + h + ':' + m
Ejemplo n.º 6
def get_kek(dest):
    url = 'http://rcwww.kek.jp/norm/dose.html'
    html = urllib2.urlopen(url).read()
    value = re.search(r'<b>\s*([\d.]+)', html).group(1)
    time = re.search(r'\((?P<year>\d{4})-(?P<mon>\d{1,2})-(?P<day>\d{1,2}) '
            '(?P<hour>\d{1,2}):(?P<min>\d{1,2})', html)
        int(time.group('mon')), int(time.group('day')),
        int(time.group('hour')), int(time.group('min'))), 3, float(value))
    return dest
Ejemplo n.º 7
def get_kek(dest):
    url = 'http://rcwww.kek.jp/norm/dose.html'
    html = urllib2.urlopen(url).read()
    value = re.search(r'<b>\s*([\d.]+)', html).group(1)
    time = re.search(
        r'\((?P<year>\d{4})-(?P<mon>\d{1,2})-(?P<day>\d{1,2}) '
        '(?P<hour>\d{1,2}):(?P<min>\d{1,2})', html)
        datetime.datetime(int(time.group('year')), int(time.group('mon')),
                          int(time.group('day')), int(time.group('hour')),
                          int(time.group('min'))), 3, float(value))
    return dest
Ejemplo n.º 8
    def __CheckFlows(self, line, Flows):
        if debugLog >= debugLogLevel[-1]:
            print 'CheckFlow: ' + line, Flows

        for key, values in Flows.items():
            for i in values.split(ConfigFileSplitSym):
                pattern = re.compile(i)

                m = re.search(pattern, line)
                if m:
                    if debugLog >= debugLogLevel[2]:
                        print 'Find Flows: ' + i + '\n' + line

                    timeFormat = re.compile(r'\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}')
                    time = re.match(timeFormat, line)

                    if self.__CameraFlowStep != key:
                        if debugLog >= debugLogLevel[2]:
                            print 'Change Flow ' + key + ' to ' + self.__CameraFlowStep

                        self.__FlowsNum += 1
                        self.__CameraFlows.append(time.group() + ' ' + key +

                        self.__CameraFlowStep = key


                    if debugLog >= debugLogLevel[2]:
                        print 'Not Finish ' + key + '\n'
Ejemplo n.º 9
    def match(self, str):
            Extract time

            Ex line:
   [24/Oct/2016:00:02:40 -0700] 0.000 https .com "GET /staticx/udemy/css/fancybox_overlay.png HTTP/1.1" 404 162

            Will extract 24/Oct/2016:00:02:40,
            Get hour and minute and concatenate (02:40 -> 0240)
            Typecast to int
            Do conditional using self.cmp statement

                str (:str):
                    Single line
        time = re.search(r'[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}', str)

        if time:
            tt = time.group().split(':')

            strtime = tt[1] + tt[2]
            itime = int(strtime)

            return self.statement(
                itime, self.timer
            )  #conditional using two number (02:40 > 01:34 -> 240 > 134)
    def run(self):
        if self.iperf3:
            iperf_cmd = 'timeout -k 1 320 iperf3 -c {} -t 300'.format(
            iperf_cmd = 'timeout -k 1 320 iperf -c {} -t 300'.format(
        print("Running iperf...")
        iperf = subprocess.Popen(shlex.split(iperf_cmd))

        ping_cmd = 'ping -I {} {}'.format(self.interface, self.target)
        ping = subprocess.Popen(shlex.split(ping_cmd), stdout=subprocess.PIPE)

        (out, err) = ping.communicate()

        if iperf.returncode != 0:
            return iperf.returncode

        print("Running ping test...")
        result = 0
        time_re = re.compile('(?<=time=)[0-9]*')
        for line in out.decode().split('\n'):
            time = time_re.search(line)

            if time and int(time.group()) > 2000:
                print("ICMP packet was delayed by > 2000 ms.")
                result = 1
            if 'unreachable' in line.lower():
                result = 1

        return result
Ejemplo n.º 11
    def handle_data(self, data):
        if data.strip():  #去除空格
            if self.lasttag == 'h1' and self.iftitle and self.ifFirst:  #标题
                #print("Encountered title  :", data)
                self.news.title = data

            if self.iftime and self.ifFirst:  #时间
                time = re.search(re_time, data)
                if time:
                    #print("Encountered time  :", time.group())
                    self.news.time = time.group()

            if self.lasttag == 'p' and self.iftext:  #正文内容
                pageinfo = re.search(re_page, data)
                if pageinfo:
                    if self.ifFirst:  # 若是第一页,读取页数信息
                        num = re.search(re_pagenum, pageinfo.group())
                        self.pageNum = num.group().__len__()
                        #print("Encountered pageinfo  :", self.pageNum)
                    #print("Encountered text  :", data)
                    if self.news.text == None:
                        self.news.text = data
                        self.news.text += data
Ejemplo n.º 12
def Movie(movieName):
	cookieJar = cookielib.CookieJar()
	opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))

	url = "http://www.imdbapi.com/?t=" + movieName
	request = urllib2.Request(url)
	page = opener.open(request)

	# This is one big string
	rawdata = page.read()

	title = re.search('"Title":"(.+?)"', rawdata)
	if title:
		titleOut = title.group(1)
		movieInfo.insert(0, "Title: " + titleOut)
	year = re.search('"Year":"(.+?)"', rawdata)
	if year:
		yearOut = year.group(1)
		movieInfo.insert(1, "Year: " + yearOut)
	time = re.search('"Runtime":"(.+?)"', rawdata)
	if time:
		timeOut = time.group(1)
		movieInfo.insert(2, "Runtime: " + timeOut)
	rate = re.search('"imdbRating":"(.+?)"', rawdata)
	if rate:
		rateOut = rate.group(1)
		movieInfo.insert(3, "Rating: "+ rateOut)
	error = re.search('"Error":"(.+?)"', rawdata)
	if error:
		errorOut = error.group(1)
		movieInfo.insert(4, errorOut)

	return movieInfo
Ejemplo n.º 14
def parseAvgTime(line):
    time = re.search(TIME_REGEX, line)
    if time is not None:
        time = time.group(0)
        time = TIME_REGEX_SPLIT.split(time)
        if len(time) == 2:
            return float(time[1])
        return None
Ejemplo n.º 16
def latencia(host=''
             ):  ## este método testa o QoS relativo à latência do meio
    status, result = ("", "")
    if platform.system() == "Linux":
        status, result = sp.getstatusoutput("ping -c1 %s " % host)
        time = re.search('time=([0-9]{1,4}\.[0-9]{1,4})', result)
        if time:
            f = time.group(1)
            return float(f)
        return 0.0
        status, result = sp.getstatusoutput("ping -c1 -w2 %s > nul" % host)
        time = re.search('time=([0-9]{1,4}\.[0-9]{1,4})', result)
        if time:
            f = time.group(1)
            return float(f)
        return 0.0
Ejemplo n.º 17
	def write(self, msg):
		time = re.search(r'\d+-\d+\s+\d+:\d+:\d+\.\d+', msg)
		tag = re.search(r'\w\s\w+\s*:\s+|\[\d+: .*\]', msg)

		if time and tag:
			date_time = datetime.datetime.strptime(time.group(),'%m-%d %H:%M:%S.%f')
			self.csr.execute('INSERT INTO Unit VALUES (?,?,?)', 
					(date_time, tag.group(0), msg))
Ejemplo n.º 18
def get_time2(times):
    tfactor = 1.0
    curr = datetime.now()
    feat = 43200.0
    for t in times:
        #print t
        time = re.match('(\d+):(\d+)',t)
        nearest = []
        nearest.append(abs(datetime(curr.year, curr.month, curr.day, int(time.group(1)), int(time.group(2)))-curr))
        nearest.append(abs(datetime(curr.year, curr.month, curr.day-1, int(time.group(1)), int(time.group(2)))-curr))
        nearest.append(abs(datetime(curr.year, curr.month, curr.day+1, int(time.group(1)), int(time.group(2)))-curr))
        closest = min(nearest)
        if closest.seconds < feat:
            feat = closest.seconds
        #print closest.seconds
        #print feat

    return feat/43200.0/tfactor
Ejemplo n.º 19
def timeParamIdx(input: str) -> int:

    time = re.search(r"time=(\d*)ms", input)
    if (time):

        return time.group(1)


        return -1
Ejemplo n.º 20
    def get_dt(self, fn):
        find_date = re.compile('_\d{1,8}(?=_\d{1,4}\.)')#('\d{8}(?=_\d{4}\.)')
        find_time = re.compile('(?<=\d_)\d{4}(?=\.)')#'(?<=\d{8}_)\d{4}(?=\.)')
        find_dt = re.compile('\d{1,8}_\d{1,4}(?=\.)')#'\d{8}_\d{4}(?=\.)')
        date = find_date.search(fn)
        time = find_time.search(fn)
        dt = find_dt.search(fn)

        if date and time and dt:
            return date.group(),time.group(),dt.group()
Ejemplo n.º 21
def text_data(text_lines):
	measure_rate = re.search(r'\d+.\d+', text_lines[3]).group()
	measure_rate = float(measure_rate)
	name = re.search(r'Roman_\w+', text_lines[2])
	if name is None:
		name = re.search(r'Accuracy\w+', text_lines[2])
	time = re.search(r'\d+:\d+:\d+', text_lines[2])
	date = re.search(r'\d+-\d+-\d+', text_lines[2])
	trial_len = re.search(r'\d+\.\d+', text_lines[4])

	return name.group(), time.group(), date.group(), trial_len.group(), measure_rate
Ejemplo n.º 22
def analyze(read):
    topic = "OOPS"
    if re.match('.* - [0-2][0-9]:[0-5][0-9]\n', read):
        print "Timed: " + read
        found = re.search('.* (?=- [0-2][0-9]:[0-5][0-9])', read)
        time = re.search('[0-2][0-9]:[0-5][0-9]', read)
        topic = found.group(0) + "\n"
    if topic == "OOPS":
        return read, topic
        return topic, time.group(0)
Ejemplo n.º 23
    def parse_time_detail(self, time):
        """ Returns minute of detail (INT) """

        time = re.search("<td class=\"timeCol\">((?P<time>[\d]{1,3})\')?</td>", time)

            if type(time.groups()) != type(()):
                return ""
        except AttributeError:
            return ""

        return time.group('time')
Ejemplo n.º 24
def getDates(text):
	match = re.search('The Fore River Bridge is scheduled to open at(.+?)to allow', text)
	dates = []
	time_pattern = re.compile(r'(\d{1,2} a.m.?|\d{1,2} p.m.?|\d{1,2}:\d{2} a.m.?|\d{1,2}:\d{2} p.m.?)')
	if match != None:
		sentence = match.group(1)
		start_at = 0
		for date in re.finditer(r'(\w+, \w+\.* [0-9]{1,2})', sentence):
			for time in time_pattern.finditer(sentence, pos=start_at, endpos=date.span()[0]):
				dates.append(time.group(1) + " " + date.group(1))
			start_at = date.span()[1];
	return dates
Ejemplo n.º 25
    def __do_analysis(self, event):
        :param event:
        if self.fileText.GetValue() == '':

        if self.cmd not in comands.cmds:
        output = FileTool.readObject(self.fileText.GetValue())
        x1 = []
        y1 = []
        statisticInfo1 = StatisticInfo('post request cost:')
        x2 = []
        y2 = []
        statisticInfo2 = StatisticInfo('sum cost')
        p1 = 'post_request_cost:.*:{}:'.format(self.cmd)
        pattern1 = '{}\d+'.format(p1)
        p2 = '耗时:{}:'.format(self.cmd)
        pattern2 = '{}\d+'.format(p2)
        time_pattern = '^\\[\d{2}:\d{2}:\d{2}:\d{3}\\]'
        for line in output:
            time = re.match(time_pattern, line)
            if time is None:
            timestr = re.sub('[\\[\\]]', '', time.group(0))
            mlis = self.format_date(timestr)

            st = re.search(pattern1, line)
            if st is not None:
                t = re.sub(p1, '', st.group(0))

            st1 = re.search(pattern2, line)
            if st1 is not None:
                t = re.sub(p2, '', st1.group(0))



        self.draw(x1, y1, x2, y2, self.cmd)
Ejemplo n.º 26
    async def log(self,
                  time: str,
                  amount: int = 10,
                  channel: discord.Channel = None):
        time = reg.match(time)
        if channel == None:
            channel = ctx.message.channel

        def none0(val):
            return int((val, 0)[val == None])

        datedif = datetime.now() - timedelta(days=none0(time.group(2)),
        ##await self.bot.say("Time: `{}d{}h{}m{}s` Amt: `{}` Ch: `{}`".format(time.group(2), time.group(4), time.group(6), time.group(8), amount, (channel, "all")[channel == None]))
        ##status = await self.bot.say("Searching logs for messages from all users in {}.".format((channel.mention, "all channels")[channel == "*"]))

            "SELECT * FROM `{}` WHERE `timestamp` >= '{}' ORDER BY `id` ASC LIMIT {}"
            .format(channel.id, datedif, amount))
        data = cursor.fetchall()
        msg = "Done! Found {} messages: ```".format(len(data))

        m = 0
        for entry in data:
            user = discord.utils.get(self.bot.get_all_members(), id=entry[1])
            m += 1
            msg += "\n{} #{} @{} >> \"{}\"".format(entry[4], entry[0],
                                                   entry[1], entry[3])
        if len(msg) < 1998:
            msg += "```"
        while len(msg) > 6:
            await self.bot.send_message(
                msg[0:2000] + ("```", "")[len(msg) < 1998])
            msg = "```{}```".format(msg[1994:])
Ejemplo n.º 27
def parseTweet(tweet):
    # https://regex101.com/#python
    # Obviously we are just putting faith in the formatting of @tripleJplay
    songRegex = re.compile(r"(?<=-\W).+?(?=\W\[)")
    artistWithTwitterRegex = re.compile(r'(?<=@).+?(?=\W)')  #
    aristWithoutTwitterRegex = re.compile(r'.+(?=\W-)')
    featureArtistTwitterRegex = re.compile(r'(?<=ft.\W\@).+?(?=\W)')
    timeRegex = re.compile(r'(?<=\[)[0-9]{2}:[0-9]{2}(?=\])')  # (?<=\[).+(?=\]) #THIS IS VERY SPECIFIC

    song = songRegex.search(tweet.text)
    artist = artistWithTwitterRegex.search(tweet.text)
    feat = featureArtistTwitterRegex.search(tweet.text)
    time = timeRegex.search(tweet.text)
    day = tweet.created_at.day
    month = tweet.created_at.month
    year = tweet.created_at.year

    if song:
        song = song.group()
        song = "^Song Not Found!^"

    if artist:
        artist = artist.group()
        artistNoTwitter = aristWithoutTwitterRegex.search(tweet.text)

        if artistNoTwitter:
            artist = artistNoTwitter.group()
            artist = "!Artist Not Found!"
    if feat:
        feat = feat.group()
        feat = '*Feature Artist Not Found*'

    if time:
        time = time.group()
        time = "%Time Not Found!%"

    #use pytz to put the time in sydney time
    sydney = timezone('Australia/Sydney')
    tweet.created_at = timezone('UTC').localize(tweet.created_at) #Attach UTC time to tweet
    date_object = tweet.created_at.astimezone(sydney) #Convert to sydney time
    date_object = date_object.replace(hour=int(str(time)[:2]), minute=int(str(time)[3:])) #get the hour and minute from the actual tweet

    return {'song': song, 'artist': artist, 'feat': feat, 'time': date_object, 'raw': tweet.text,
            'created_at': tweet.created_at, 'id': tweet.id}
Ejemplo n.º 28
    def CheckLogs(self, Flows, ErrLogs, KeyWords):
        if debugLog >= debugLogLevel[-1]:
            print 'CheckLogs: ', self.__filename

        while 1:
            line = self.__fd.readline()

            if not line:
                if debugLog >= debugLogLevel[2]:
                    if self.__endTime != '':
                        print 'End Time:', self.__endTime
                    print '(INFO) Finish Parse file: ' + self.__filename
                    print '(INFO) lines: ', self.__logLines, '\n'

            timeFormat = re.compile(r'\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}')
            time = re.match(timeFormat, line)

            if time and self.__logLines == 0:
                self.__beginTime = time.group()
                if debugLog >= debugLogLevel[2]:
                    print 'Begin Time:', self.__beginTime
                if time:
                    self.__endTime = time.group()

                if debugLog >= debugLogLevel[-1]:
                    print 'INFO: Read line --->' + line

            self.__CheckFlows(line, Flows)

            self.__CheckErrLogs(line, ErrLogs)

            self.__CheckKeyWords(line, KeyWords)

            self.__logLines += 1
Ejemplo n.º 29
    def get_trtime_position(self, grab):
        text = ''
        time = ''
        dt = ''
#        [last()]
#        grab.doc.select('//th'):
            #print grab.doc.select('//tr[last()]').select('./td[1][.//b]').html()
            text = grab.doc.select('//tr[last()]').select('./td[1]/b').text()
            dt = datetime.strptime(text, "%d.%m.%Y %H:%M")
            text = grab.doc.select('//tr[last()]').select('./td[1]').text()
            time = re.search("(\d{2}:\d{2})", text)
            if time:
                print text, time.group().strip().lower()
Ejemplo n.º 30
def getPost(filename):
    mes = None
    ti = None
    frm_zid = None
    file = open(filename, 'r', encoding='utf-8')
    for line in file:
        message = re.match(r"message:\s(.+)", line)
        time = re.match(r"time:\s(.+)\+", line)
        frm = re.match(r"from:\sz(\d+)", line)
        if message:
            mes = message.group(1)
        if time:
            ti = time.group(1).replace('T', ' ')
        if frm:
            frm_zid = frm.group(1)
    if mes and ti and frm_zid:
        return [mes, ti, frm_zid]
Ejemplo n.º 31
    def create_stats():
        logger = create_logger()

        # "sort_algo","entry_lines","split","time(sec)","cpu(%)"
        line = '"%s",%d,%d,%s,%s\r\n'
        # Launch stats
        for p in Project: # each project version
            stats=open(HOME+"stats/Project_"+p.name+".csv", "a+")
            for alg in SortAlgo: # each sort algo
                # set algo active
                SortAlgo.set(alg, Project.__file__%(p.value))
                logger.info("SortAlog set %s in %s"%( alg.name, Project.__file__%(p.value) ))
                # compile
                call(MAKE, logger=logger, shell=True)
                for f in Files: # for each file size
                    for i in range(1,11): # 1 to 10 split
                        tmp = f.value[0].split(".")
                        sorted_file = ".".join(tmp[:-1] + ["sort"] + tmp[-1:])

                        command = CMD%(HOME, Project.__m_option__%(p.value), f.value[0], sorted_file, i)
                        outs, errs = call(command, logger=logger, is_stats=True, shell=True)

                        # write results of this test
                        res = (outs + errs).decode()

                        cpu = re.match(r"[^']*Percent of CPU this job got: (?P<cpu>[0-9]{1,4})%[^']*", res)
                        time = re.match(r"[^']*Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (?P<time>[0-9]?:[0-9]{2}\.[0-9]{2})[^']*", res)

                        if time is not None:
                            time = time.group(1)
                            tmp_time = int(time.split(":")[0]) *60 # minutes
                            time = tmp_time + float(time.split(":")[1]) # seconds

                        cpu = int(cpu.group(1)) if cpu is not None else "err"
                        if time is None:
                            time = "err"
                        elif time is not None and time >= TIMEOUT:
                            time = "inf"

                        stats=open(HOME+"stats/log/Project_"+p.name+".csv", "a+")
                        stats.write(line % (alg.name, f.value[1], i, time, cpu))
Ejemplo n.º 32
    def customerParseItem(self, response, item, **kwargs):
        sel = Selector(response=response)
        item["publish_time"] = ''
        item['author'] = ''
        text0 = sel.xpath('//*[@class="info"]/i/a/text()').extract_first()
        if text0:
            item['source'] = text0.split(" ")[0]
        text1 = sel.xpath('//*[@class="info"]/i/text()').extract()
        if text1:
            for i in text1:
                if "年" in i:
                    pattern = re.compile('\d{4}年\d{2}月\d{2}日')
                    time = re.search(pattern, i)
                    item['publish_time'] = time.group()

        return item
Ejemplo n.º 33
def standard_work_article(target_url):
    return_data = []
    headers = {'User-Agent': UserAgent}
    res = requests.get(target_url, headers=headers)
    reg_content = res.content.decode('utf8')
    html_page = bs4.BeautifulSoup(reg_content, 'lxml')
    infos = ''
        infos = html_page.find(class_='content').findAll('p')
    except AttributeError:
        infos = html_page.find(id='container_txt').findAll('p')
    for one_info in infos:
        content_dir = re.search('<p[\\s\\S]+/p>', str(one_info))
        if content_dir:
            need_content = one_info.text
            if isinstance(one_info, bs4.NavigableString):
                need_content = one_info
        if not need_content.strip():
        time = html_page.find('h2').text
        datetime_dir = re.match(
            '.+:(?P<year>\d{4})-(?P<month>\d+?)-(?P<day>\d+?) (?P<hour>\d+?):(?P<minute>\d+?):.+',
    except AttributeError:
        time = html_page.find(id='pubtime_baidu').text.strip()
        time = re.search(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2}:\d{1,2})",
        time = time.group(0)
        datetime_dir = re.match(
            '(?P<year>\d{4})-(?P<month>\d+?)-(?P<day>\d+?) (?P<hour>\d+?):(?P<minute>\d+?):\d+',
    tt_tmp = '%s-%s-%s %s:%s' % (datetime_dir['year'], datetime_dir['month'],
                                 datetime_dir['day'], datetime_dir['hour'],
    _datetime = 0
    if datetime_dir:
        _datetime = get_time('%Y-%m-%d %H:%M', tt_tmp)
    _title = html_page.find('h1').text
    return _datetime, '%s<replace title>%s' % (_title, '\n'.join(return_data))
Ejemplo n.º 34
    def __init__(self, header, message):
        header = header.strip().lstrip('-!>')
        #message = message.strip()
        self.inp = (header, message)

        data = header.rsplit(':', 3)
        time = self._timepat.match(data.pop(0))
        if time: time = time.group()
        else: time = '-'

        if len(data) == 1: data.append('')
        if len(data) == 2: data.append('')

        level, file_, line = data[:3]
        extra = tuple(data[3:])

        self.update(time=time, level=level, file=file_, line=line)
        self['message'] = message
Ejemplo n.º 35
 def get_status(self,user_num):
     page = 0
     result = "none"  #记录搜索结果
     time_form = re.compile("dtime\":\".*?\s")
     required_time = "2015-05-29"  #记录期望搜索时间
     search = 1  #是否继续搜索
         while search:
             more_result = self.opener.open("http://status.renren.com/GetSomeomeDoingList.do?userId=" + user_num + "&curpage=" + str(page) + "&_jcb=jQuery1111031641900399699807_1433395719580&requestToken=379990670&_rtk=e26730e8&_=1433395719581")
             result = result + more_result.read()
             time = time_form.search(more_result)
             if (cmp(time.group(0)[8:],required_time) == -1) or (page > 100):  #在此之前已经没有匹配状态,自动跳出搜索
                 search = 0
             page = page + 1
         #fp = open("user/" + user_num + ".txt",'w')
         return result
Ejemplo n.º 36
def process_time_step(message):
        time = message.text
        if time == 'Отмена':
        chat_id = message.chat.id
        event = event_dict[chat_id]
        time = re.search(r'\d{2}[:|/|-|.| ]\d{2}', time)
        if time == None:
            msg = bot.reply_to(message, 'Введите время в формате (ЧЧ:ММ)')
            bot.register_next_step_handler(msg, process_date_step)

        event.time = re.sub(r'[:|/|-|.| ]', ':', time.group())
        markup = types.ReplyKeyboardMarkup(one_time_keyboard=True, resize_keyboard=True)
        markup.add('Нет ссылки', 'Отмена')
        msg = bot.reply_to(message, 'Введите ссылку', reply_markup=markup)
        bot.register_next_step_handler(msg, add_new_event_url)

    except Exception as e:
        print("Exception: " + str(e))
        bot.reply_to(message, 'Введите время в формате (ЧЧ/ММ)')
Ejemplo n.º 37
    def writeComment(self, dataList):
        for list in dataList:
            list = str(list)
            soup = BeautifulSoup(list)
            # 获取赞这个评论或者答案的总数
            pzanCount = str(soup.find('span', class_='count'))
            # 获取某个评论或答案的内容
            pcontent = str(soup.find('div', class_='zm-editable-content'))
            # 获取发表评论或者答案的时间
            ptime = str(soup.find('a', class_='answer-date-link'))
            zanCount = re.search('<span.*?>(.*?)</span>', pzanCount, re.S)
            content = re.search('<div.*?>(.*?)</div>', pcontent, re.S)
            time = re.search('<a.*?>(.*?)</a>', ptime, re.S)

            self.fp.write("赞同的人数:" + zanCount.group(1) + "\n")
            contentText = re.sub('<.*?>', '', content.group(1))
            self.fp.write("评论的内容:\t" + contentText + "\n")

            self.fp.write("\t\t" + time.group(1) + "\n\n")

            replayID = re.search(
                list, re.S)
Ejemplo n.º 38
def onHandleTime():
    timeString = getLastLoginTime()
    timeRegex = "\d\d\d\d-(0?[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01]) (00|[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9]):([0-9]|[0-5][0-9])([0-9]|[0-5][0-9])"
    time = re.search(timeRegex, timeString)
    return time.group(0)
Ejemplo n.º 39
        for log in logs:
                print log
		if len(log) < 3:

                fp = open(log)
                        line = fp.readline()
                        if not line:
                        m = re.match(r'.*'+strm+r'.*',line)
                        if m:
				time = re.search(r'\[.*\]',line)
                                if time:
                                        time = time.group(0)
                                        time = re.sub(r'\[|\]','',time)
                                        #sdata = re.split(r':',time)
                                        #date = sdata[0]
                                        #hour = sdata[1]
                                        #min = sdata[2]
                                        #second = re.split(r'\s+',sdata[3])[0]
					time_original = re.split(r' +',time)[0]
                                user = re.search(r'user_id=\d+?&',line)
                                if user:
                                        userid= user.group(0)
                                        userid = re.split(r'=',userid)[1]
                                        userid = re.split(r'&',userid)[0]
                                mobile = re.search(r'mobile=\d+?&',line)
Ejemplo n.º 40
    def isTimeover(self, a, b):

        time = re.search(r'(.*?):(.*?):(.*?) (.*?):(.*?):(.*)', a)
        year = time.group(1)
        month = time.group(2)
        day = time.group(3)
        hour = time.group(4)
        min = time.group(5)
        second = time.group(6)
        #         print year, month, day, ":", hour, min, second

        time1 = re.search(r'(.*?):(.*?):(.*?) (.*?):(.*?):(.*)', b)
        year1 = time.group(1)
        month1 = time.group(2)
        day1 = time.group(3)
        hour1 = time.group(4)
        min1 = time.group(5)
        second1 = time.group(6)
        #         print year, month, day, ":", hour, min, second
        if year > year1:
            return True
        if month > month:
            return True
        if day > day1:
            return True
        if hour > hour1:
            return True
        if min > min1:
            return True
Ejemplo n.º 41
Archivo: wtslib.py Proyecto: mgijax/wts
def parse_Time (tym):
	''' returns a tuple (standardized time string, list of error strings)
	# Requires:	tym - string; a time (HH:MM PM) where the AM/PM
	#		designation is optional
	# Effects:	parses tym and converts it to the standard WTS time
	#		format (HH:MM PM).  Maintains a list of any errors
	#		encountered in parsing.  Returns a tuple with either:
	#		(time string, None) if no errors, or ('', list of error
	#		strings) if errors were found.
	# Modifies:	no side effects
	global START, END

	# we use regular expressions to do our initial examination and
	# splitting of tym into its component pieces.  Set up the regex
	# strings we'll use:

	re_ampm = '\([AP]M\)'				# AM/PM
	re_hhmm = '\([012 ]?[0-9]\):\([0-5][0-9]\)'	# HH:MM

	# now, compile the regular expressions to look for recognizable tym
	# formats (for description, see above):

	timeampm = regex.compile ('.*' + re_ampm)	# is AM/PM anywhere?
	time = regex.compile (START + re_hhmm + END)	# time without AM/PM
	fulltime = regex.compile (START + re_hhmm + 
		'[ \t]*' + re_ampm + END)		# time with AM/PM

	ampm = None		# no AM/PM designation yet
	hours = None		# no hours yet
	minutes = None		# no minutes yet

	# now, try to match tym to one of the formats (one with AM/PM, one
	# without)

	if (timeampm.match (tym) >= 0):
		if (fulltime.match (tym) >= 0):
			(hours, minutes, ampm) = fulltime.group (1, 2, 3)
	elif (time.match (tym) >= 0):
		(hours, minutes) = time.group (1, 2)

	# if hours is still None, then we know it didn't match.  bail out.

	if not hours:
		return ('', [ 'Could not recognize time: %s' % tym ])

	# otherwise, we have no errors yet.  So, get integer values for hours
	# and minutes.  Then error-check the hours.  Because of the regular
	# expression matching [0-5][0-9], we know that minutes must be in the
	# correct range (00-59).

	errors = []
	hours = string.atoi (hours)
	minutes = string.atoi (minutes)

	if (hours < 1) or (hours > 23):
		errors.append ('Hours out of range: %d' % hours)

	# if ampm not yet defined, then set the AM/PM designation and adjust
	# the hours if necessary (convert 24-hour clock to 12-hour)

	if not ampm:
		if (hours >= 12):
			ampm = 'PM'
			if (hours > 12):
				hours = hours - 12
			ampm = 'AM'

	# otherwise, check that the designation is correct.  Note that a PM
	# designation with hours > 12 is not necessarily incorrect, but rather
	# just redundant.  (so adjust it to reflect a 12-hour clock)

		if (ampm == 'AM') and (hours > 12):
			errors.append ('AM designation is incorrect for ' + \
				'%d hours' % hours)
		elif (ampm == 'PM') and (hours > 12):
			hours = hours - 12

	# now, return appropriate tuple

	if len (errors) > 0:
		return ('', errors)
		return (('%s:%s %s' %
			(string.zfill(hours,2), string.zfill(minutes,2), ampm),
    def __ScanCamLog(self, fd):
        if debugLog >= debugLogLevel[2]:
            print "INFO: begin scan camera log!"

        while 1:
            line = fd.readline()

            if not line:
                if debugLog >= debugLogLevel[2]:
                    print "INFO: Finish Parse file!\n"

            if debugLog >= debugLogLevel[-1]:
                print "INFO: Read line is :" + line

            for i in range(0, len(AppLogType.CamLog)):  # Adapter every key tag
                if debugLog >= debugLogLevel[-1]:
                    print "INFO: Camera log-> " + AppLogType.CamLog[i]

                log = re.compile(AppLogType.CamLogPattern[i])

                if debugLog >= debugLogLevel[-1]:
                    print "INFO: Scan log-> " + log.pattern

                search = re.search(log, line)
                if search:
                    if debugLog >= debugLogLevel[1]:
                        print "INFO: Search Camera log->" + search.group()
                        print "line is: " + line

                    timeFormat = re.compile(r"\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+")

                    if debugLog >= debugLogLevel[2]:
                        print "INFO: TimeFormat-> " + timeFormat.pattern

                    time = re.search(timeFormat, line)
                    if time:
                        if debugLog >= debugLogLevel[2]:
                            print "INFO: Find key time-> " + time.group()

                            # patch-> cal tag position and write to the right pos
                        if debugLog >= debugLogLevel[1]:
                            tags = search.group()
                            print tags
                            # print 'INFO: pos is '+str(AppLogType.CamLogPos.index(tags))+' time list len is'+str(len(self.__time))

                        pos = AppLogType.CamLogPos.index(search.group()) - len(self.__time)
                        if pos:
                            if debugLog >= debugLogLevel[-1]:
                                print "WARNING: There is " + str(pos) + " data null!!!"

                            for i in range(0, pos):


                        if debugLog >= debugLogLevel[2]:
                            print "INFO: Time -> " + self.__time[i]

                            ## Save one group
                    if i == len(AppLogType.CamLog) - 1:

                        if debugLog >= debugLogLevel[-1]:
                            print "INFO: Record one goup len num -> " + str(len(AppLogType.CamLog) - 1)

                        if debugLog >= debugLogLevel[1]:
                            print self.__time

                            # Save a group

                        # calc KPI

                        self.__time = []