def extract(value): if value == '': return None dt = datetimeparse(value) if tz: dt = dt.astimezone(tz) return dt
def image_datetime(filename): """Returns the DateTime of when the image was taken. Loads DateTime from EXIF data. Will fail if none is given. """ with open(filename) as f: exif_tags = exifread.process_file(f) assert 'Image DateTime' in exif_tags return datetimeparse(exif_tags['Image DateTime'].values)
def add_rss_feed(self, url): self.url = url self.post_dict_list = [] try: d = feedparser.parse(self.url) self.status = d.status self.headers = d.headers self.title= d.feed.title self.rights= d.feed.rights self.subtitle= d.feed.subtitle for post in d.entries: self.post_dict_list.append({'published': str(datetimeparse(post.published)), 'title': post.title, 'link': post.link, 'id': post.id,}) except: if self.debug: message = '[Fail] add_rss_feed:', self.url else: message = '[Success] add_rss_feed', self.url return message try: feeds = self.data_store.get('feeds')[self.title] if self.debug: print '[success] found existing feeds', feeds except: if self.debug: print '[info] no feeds, adding default', self.title, self.url self.data_store.put('feeds', status= self.status, headers= self.headers, title=self.title, subtitle= self.subtitle, url=self.url, rights = self.rights, entries=json.dumps(self.post_dict_list))
def getAcademicCalendarInfo(): """Attempts to retrieve academic cal information from file before making request Returns: dict: dict with all the academic cal events under attr 'events' """ try: response = {} success = False response = readFromFile() # If reading from file fails, make a call if 'error' in response: if (response['error'] == True): print(response["message"], ". Fetching new data...") response = makeAcademicCalApiCall() success = writeToFile(response) age = datetime.datetime.now().timestamp() - datetimeparse( response['last_fetched']).timestamp() # Expiry of information. Update academic cal information every 24 horus if (age > (60 * 60 * 23)): response = makeAcademicCalApiCall() if (response['error'] == True): raise Exception(response['message']) success = writeToFile(response) # See if the file was updated or not in console if (success == False): print("File was not updated.") return response except Exception as e: # Error? Return error and message print("[getAcademicCalendarInfo] Error: ", e) return ({"error": True, "message": e})
def show_category(category_url): data = [] response = requests.request( method=u'GET', url=category_url, ) soup = BeautifulSoup(response.content, 'html.parser') videos = soup.findAll(u'div', {u'class': u'broadcast-player'}) for video in videos: data_unique = [] title = video.findAll( u'h1', {u'class': u'broadcast-player-meta__title'})[0].text title = " ".join(title.split()) timecode = video.findAll(u'time')[0][u'datetime'] parsed_timecode = datetimeparse(timecode) label = u'{title}'.format(title=title) date = u'{time}'.format( time=parsed_timecode.strftime(u'%Y-%m-%d %H:%M')) videoJSONStr = video.find("script").contents[0] videoJSON = json.loads(videoJSONStr) # add each quality version of the video as an entry for vid in videoJSON['formats']: quality = vid['name'] if quality not in data_unique: data_unique.append(quality) file_url = vid['url']['mp4'].replace("&legacy=resolve.php", "") data.append({ u'label': label + ' - ' + date + ' (' + quality + ')', u'path': { u'endpoint': u'show_video', u'video_url': u'{video_url}'.format(video_url=file_url), }, u'is_playable': True, }) return data
def show_category(category_url): data = [] response = requests.request( method=u'GET', url=category_url, ) soup = BeautifulSoup(response.content) videos = soup.findAll( u'li', {u'class': u'broadcast-player__playlist__item'}, ) for video in videos: title = video.findAll( u'span', {u'class': u'broadcast-link__name '}, )[0].text timecode = video.findAll(u'time')[0][u'datetime'] parsed_timecode = datetimeparse(timecode) label = u'{title} {time}'.format( title=title, time=parsed_timecode.strftime(u'%Y-%m-%d %H:%M'), ) video_url = video.findAll(u'a')[0][u'href'] data.append( { u'label': label, u'path': { u'endpoint': u'show_video', u'video_url': u'https://nos.nl{video_url}'.format( video_url=video_url, ), }, u'is_playable': True, } ) return data
def removal_id_filter(instance): """Filter function to determine which classic event instances need to be removed Args: instance (event: Dict): a specific instance of a recurring event from Google Cal API response Returns: bool: whether or not the date should be kept of removed. Follows in-built filter conventions """ test = datetimeparse( instance['start']['dateTime']).replace(tzinfo=None) flag = True fall_start = datetimeparse( importantEvents['periods']['fall_semester']["start"]["start"] ["datetime"]).replace(tzinfo=None) fall_end = datetimeparse( importantEvents['periods']['fall_semester']["end"]["start"] ["datetime"]).replace(tzinfo=None) spring_start = datetimeparse( importantEvents['periods']['spring_semester']["start"]["start"] ["datetime"]).replace(tzinfo=None) spring_end = datetimeparse( importantEvents['periods']['spring_semester']["end"]["start"] ["datetime"]).replace(tzinfo=None) # class occurs in fall semester if test > fall_start and test < fall_end: flag = False # before spring semester begins if test > spring_start and test < spring_end: flag = False # is on any blacklisted dates for holiday in importantEvents['holidays']: if test.date() == datetimeparse(holiday).date() and not flag: flag = True return flag
def getRate4(url, start='1900-1-1', end='1900-1-1', maxPage=1000, reqinterval=0.3): start = datetimeparse(start) end = datetimeparse(end) if start >= end: return [] INFOS = [] INFO = { 'siteId': None, 'shopId': None, 'aucNumId': None, 'sku': None, 'title': None, 'content': None, 'date': None, 'rateId': None, 'anony': None, 'nick': None, 'nickUrl': None, 'rank': None, 'rankUrl': None, 'userId': None, 'vipLevel': None, 'validscore': None, 'appendId': None, 'appendContent': None, 'appendAfter': None, 'appendTime': None, 'appendReply': None, 'spuRatting': None } r = request(url, requiredPropertyRegx=r'shopId\=\d+') s = r.content shopId = re.findall(r'shopId\=(\d+)', s)[0] siteId = re.findall(r'siteId\=(\d)', s)[0] itemId = re.findall(r'\"itemId\"\s*\:\s*\"(\d+)\"', s)[0] domain = '' page = 1 if siteId == '2': try: domain = re.findall(r'\"apiTmallReview\"\:\"(.+?)\"', s)[0] domain = domain.replace('\\', '') + '&' domain = domain \ + 'order=1&append=0¤tPage=1&forShop=1&ismore=1&callback=jsonp%d' \ % (time.time() * 1000) except: domain = 'http://rate.tmall.com/list_detail_rate.htm' params = { 'callback': 'jsonp%d' % (time.time() * 1000), 'itemId': re.findall(r'item_id=(\d+)', s)[0], 'spuId': re.findall(r'spu_id=(\d+)', s)[0], 'sellerId': re.findall(r'user_num_id=(\d+)', s)[0], 'order': '1', 'append': '0', 'currentPage': '1', 'ismore': '1', 'forShop': '1', } domain = domain + '?' + '&'.join( ['%s=%s' % (k, str(v)) for (k, v) in params.items()]) else: domain = re.findall(r'data\-listApi\=\"(.+?)\"', s)[0] params = { 'currentPageNum': 1, 'rateType': '', 'orderType': '', 'showContent': '1', 'attribute': '', 'callback': 'jsonp_reviews_list', } domain = domain + '&' + '&'.join([ k + '=' + str(v) for (k, v) in params.items() ]).replace('showContent=1', 'showContent=') rateIds = [] while 1: if page > maxPage or page > 200: break stop = False if domain.find('currentPageNum') != -1: domain = domain.replace( re.findall(r'currentPageNum\=\d+', domain)[0], 'currentPageNum=' + str(page)) else: domain = domain.replace( re.findall(r'currentPage\=\d+', domain)[0], 'currentPage=' + str(page)) time.sleep(reqinterval) r = request(domain) s = r.content if siteId == '2': result = re.findall(r'\{\"aliMallSeller\"\:.+?\"userVipPic\".+?\}', s) keys = [ 'aucNumId', 'auctionSku', 'title', 'rateContent', 'rateDate', 'id', 'anony', 'displayUserNick', 'displayUserLink', 'displayRateSum', 'displayRatePic', 'displayUserNumId', 'userVipLevel', 'rateResult', ] else: result = re.findall(r'\{\"append\"\:.+?\"validscore\".+?\}', s) keys = [ 'aucNumId', 'sku', 'title', 'content', 'date', 'rateId', 'anony', 'nick', 'nickUrl', 'rank', 'rankUrl', 'userId', 'vipLevel', 'validscore', ] metadata = [] for (i, t) in enumerate(result): metadata = [] for key in keys: t2 = re.findall(r'\"' + key + r'\"\:\"*(.*?)\"*[\,\}]', t) if len(t2) > 0: t2 = t2[0] t2 = t2.decode(r.encoding, 'ignore').encode('utf-8') else: t2 = None metadata.append(t2) if metadata[6] == 'true': metadata[8] = None metadata[10] = None # 追加评论 if siteId == '2': appendComment = re.findall(r'\"appendComment\"\s*\:\s*\{.+?\}', t) appendComment = (appendComment[0] if len(appendComment) > 0 else None) if appendComment: metadata.extend([ re.findall(r'\"commentId\"\s*\:\s*(\d+)', appendComment), re.findall(r'\"content\"\s*\:\s*\"(.+?)\"', appendComment), None, re.findall(r'\"commentTime\"\s*\:\s*\"(.+?)\"', appendComment), re.findall(r'\"reply\"\s*\:\s*\"(.*?)\"', appendComment) ]) else: metadata.extend([None] * 5) else: appendComment = re.findall(r'\"append\"\s*\:\s*\{.+?\}', t) appendComment = (appendComment[0] if len(appendComment) > 0 else None) if appendComment: metadata.extend([ None, re.findall(r'\"content\"\s*\:\s*\"(.+?)\"', appendComment), re.findall(r'\"dayAfterConfirm\"\s*\:\s*\"(.+?)\"', appendComment), None, re.findall(r'\"reply\"\s*\:\s*\"(.*?)\"', appendComment) ]) else: metadata.extend([None] * 5) # spuRatting spuRatting = re.search(r'("spuRatting"\:\[.+\])', t, re.S) if spuRatting: spuRatting = spuRatting.group(1).decode( r.encoding).encode('utf-8') spuRatting = yaml.load(_parse_json_str(spuRatting)) spuRatting = ';'.join([ '%s:%s' % (spu['name'], spu['desc']) for spu in spuRatting['spuRatting'] ]) metadata.append(spuRatting.encode('utf-8')) else: metadata.append(None) # handle regx and encoding for i in range(12, 17): if not metadata[i]: continue metadata[i] = (metadata[i][0] if len(metadata[i]) > 0 else None) if metadata[i]: metadata[i] = metadata[i].decode(r.encoding, 'ignore').encode('utf-8') # handle end metadata[4] = metadata[4].\ replace('年', '-').\ replace('月', '-').\ replace('日', '') if datetimeparse(metadata[4]) < end: INFOS.append([siteId, shopId, itemId] + metadata[1:]) page += 1 if len(INFOS) > 0 and len(metadata) > 0: t1 = re.findall(r'\d+.+?\d+.+?\d+', metadata[4])[0] t1 = datetimeparse(t1) if t1 < start or stop: break if len(result) < 20: break return INFOS
def extract(value): if value == '': return None dt = datetimeparse(value).astimezone(local_timezone).replace( tzinfo=None) return dt
def executeItemTradeSchedule( schedule_name='Schedule_itemTrade2', schedule_error_name='Schedule_itemTradeError', schedule_process_name='Schedule_itemTrade_process', schedule_update_name='TopItemTradeUpdateTime', schedule_config_uri_name='TopItemTradeUri', schedule_config_id_name='TopItemShopId', ): global RE_CONN, INTERRUPT dbConnPool = _reload_slave_db_pool(schedule_name) n = 0 reqinterval = (slave_config.get(IP)['reqinterval'] if slave_config.get(IP) else slave_config.get('default')['reqinterval']) for i in range(10): if INTERRUPT: INTERRUPT = False break # slave config _slave_interval(n, 1) # keep connection with Redis Server itemId = None while 1: try: itemId = RE_CONN.spop(schedule_name) RE_CONN.hset(schedule_process_name, itemId, 1) n = (n + 1) % 100 break except: RE_CONN = getRedisConn2() toDate = datetime.strftime(datetime.today(), '%Y/%m/%d') if itemId: (fromDate, param_uri_meta, config_id_meta) = ibbdlib.redislib.redis_pipe( RE_CONN, [['hget', schedule_update_name, str(itemId)], ['hget', schedule_config_uri_name, str(itemId)], ['hget', schedule_config_id_name, str(itemId)]]) fromDate = fromDate or '1900/1/1' param_uri_meta = (json.loads(unquote(param_uri_meta)) if param_uri_meta else {}) config_id_meta = (json.loads(config_id_meta) if config_id_meta else {}) _slave_info(schedule_name, 'Start', itemId, fromDate, toDate) try: t1 = t2 = time.time() (data, param_uri, config_id) = getTrades3( 'http://item.taobao.com/item.htm?id=%s' % itemId, fromDate, toDate, reqUri=param_uri_meta, config=config_id_meta, reqinterval=reqinterval, ) t2 = time.time() if len(data) > 0: re_command = [ 'HSET', schedule_update_name, itemId, (datetimeparse(data[0][10]) + timedelta(1)).strftime('%Y/%m/%d') ] bll_queue.put( (saveTopTrade, data, dbConnPool, None, schedule_name, itemId, schedule_process_name, schedule_error_name, t1, t2, re_command)) if not param_uri_meta: RE_CONN.hset(schedule_config_uri_name, str(itemId), quote(json.dumps(param_uri))) if not config_id_meta: RE_CONN.hset(schedule_config_id_name, str(itemId), json.dumps(config_id)) else: _slave_info( schedule_name, 'Success', itemId, 'len', 0, 'req', '%.1f' % (t2 - t1), ) RE_CONN.hdel(schedule_process_name, str(itemId)) except Exception, e: traceback.print_exc() _slave_error( schedule_name, 'Error', itemId, str(e), 'req', '%.1f' % (t2 - t1), 'save', '%.1f' % (time.time() - t2), ) RE_CONN.hset(schedule_error_name, itemId, json.dumps({ 'SlaveID': SLAVEID, 'msg': str(e) })) else: break
def extract(value): if value == '': return None dt = datetimeparse(value).astimezone( local_timezone).replace(tzinfo=None) return dt
data = cur.fetchall() cur.close() self.conn.commit() toreturn = [] #I'm sure this could be done in SQL but this way is simpler length = len(data)-1 for index, field in enumerate(data): if field[2]: index+=1 while index<=length and data[index][2]: data.pop(index) length-=1 time = (data[index][3] - field[3]) if index<=length else (datetime.datetime.now(pytz.utc)-field[3]) try: if not 'until' in args or datetimeparse(args['until'])>=field[3]: if not 'from' in args or datetimeparse(args['from'])<=field[3]: toreturn.append({ 'login_time': field[3].isoformat(), 'online_time': str(time), }) except ValueError, e: return [str(e),] return toreturn #Similar to players, but provides total session information class times_handler(base_handler): @classmethod def handle_category(self, args): cur = self.conn.cursor() sql = 'SELECT DISTINCT player_name FROM skynet_events;'
def getImportantEvents(): """Uses academic calendar information to determine holidays and semester periods Returns: dict: list of all important events, periods of fall and spring semester, and list of blacklisted dates """ academicCal = getAcademicCalendarInfo() importantEvents = [] result = {} # Checks to see that we actually have academic calendar information if academicCal["error"]: return # Get all the important dates that we are looking for in the calendar for event in academicCal["events"]: if event["summary"].find("Classes Begin") != -1: importantEvents.append(event) if event["summary"].find("Classes End") != -1: importantEvents.append(event) if event["summary"].find("Holiday:") != -1: importantEvents.append(event) if event["summary"].find("Thanksgiving") != -1: importantEvents.append(event) if event["summary"].find("Spring Break") != -1: importantEvents.append(event) if event["summary"].find("Classes Resume") != -1: importantEvents.append(event) if event["summary"].find("Easter") != -1: importantEvents.append(event) result["important_events"] = importantEvents # Calculate semester periods fall_semester = [] spring_semester = [] holidays = [] # Checks to see which events fall in which semester for event in importantEvents: event_date = datetimeparse(event['start']['datetime']) if (event_date.month > 7): fall_semester.append(event) else: spring_semester.append(event) if event['summary'].find("Holiday") != -1: holidays.append(event['start']['datetime']) # Get the start and end periods periods = { "fall_semester": getStartEnd(fall_semester), "spring_semester": getStartEnd(spring_semester) } # Get Thanksgiving dates thanksgiving_period = [ event for event in fall_semester if event["summary"].find("Thanksgiving") != -1 or event["summary"].find("Classes Resume") != -1 ] # Get Spring Break dates springbreak_period = [ event for event in spring_semester if event["summary"].find("Spring Break") != -1 or ( event["summary"].find("Classes Resume") != -1 and event['start']['datetime'].split("-")[1] == "03") ] # Get East Break dates easterbreak_period = [ event for event in spring_semester if event["summary"].find("Easter") != -1 or ( event["summary"].find("Classes Resume") != -1 and event['start']['datetime'].split("-")[1] == "04") ] if len(thanksgiving_period) == 2 and len(springbreak_period) == 2 and len( easterbreak_period) == 2: # Processing spring break and thanksgiving break as they follow the same school policy for breakperiod in [thanksgiving_period, springbreak_period]: start = datetimeparse( breakperiod[0]["start"]["datetime"]) + datetime.timedelta( days=1) end = datetimeparse(breakperiod[1]["start"]["datetime"]) while start != end: holidays.append(start.strftime("%Y-%m-%d")) start = start + datetime.timedelta(days=1) # Processing east break dates easter_start = datetimeparse( easterbreak_period[0]["start"]["datetime"]) easter_end = datetimeparse(easterbreak_period[1]["start"]["datetime"]) while easter_start != easter_end: holidays.append(easter_start.strftime("%Y-%m-%d")) easter_start = easter_start + datetime.timedelta(days=1) else: print("Break period arrays are not long enough.") result["periods"] = periods result["holidays"] = holidays return result