Example #1
0
 def extract(value):
     if value == '':
         return None
     dt = datetimeparse(value)
     if tz:
         dt = dt.astimezone(tz)
     return dt
Example #2
0
 def extract(value):
     if value == '':
         return None
     dt = datetimeparse(value)
     if tz:
         dt = dt.astimezone(tz)
     return dt
Example #3
0
def image_datetime(filename):
    """Returns the DateTime of when the image was taken.
    
    Loads DateTime from EXIF data. Will fail if none is given.
    """
    with open(filename) as f:
        exif_tags = exifread.process_file(f)
        assert 'Image DateTime' in exif_tags
        return datetimeparse(exif_tags['Image DateTime'].values)
Example #4
0
    def add_rss_feed(self, url):
        self.url = url
        self.post_dict_list = []
        
        try:
            d = feedparser.parse(self.url)
            
            self.status = d.status
            self.headers = d.headers
            self.title= d.feed.title
            self.rights= d.feed.rights
            self.subtitle= d.feed.subtitle

            for post in d.entries:
                self.post_dict_list.append({'published': str(datetimeparse(post.published)),
                                            'title': post.title,
                                            'link': post.link,
                                            'id': post.id,})
        except:
            if self.debug:
                message = '[Fail] add_rss_feed:', self.url
            else:
                message = '[Success] add_rss_feed', self.url
            return message
        
            
            
        try:
            feeds = self.data_store.get('feeds')[self.title]
            if self.debug:
                print '[success] found existing feeds', feeds
        except:
            if self.debug:
                print '[info] no feeds, adding default', self.title, self.url
                
            self.data_store.put('feeds', 
                                status= self.status, 
                                headers= self.headers, 
                                title=self.title, 
                                subtitle= self.subtitle, 
                                url=self.url, 
                                rights = self.rights,
                                entries=json.dumps(self.post_dict_list))
def getAcademicCalendarInfo():
    """Attempts to retrieve academic cal information from file before making request

    Returns:
        dict: dict with all the academic cal events under attr 'events'
    """

    try:
        response = {}
        success = False
        response = readFromFile()

        # If reading from file fails, make a call
        if 'error' in response:
            if (response['error'] == True):
                print(response["message"], ". Fetching new data...")
                response = makeAcademicCalApiCall()
                success = writeToFile(response)

        age = datetime.datetime.now().timestamp() - datetimeparse(
            response['last_fetched']).timestamp()

        # Expiry of information. Update academic cal information every 24 horus
        if (age > (60 * 60 * 23)):
            response = makeAcademicCalApiCall()
            if (response['error'] == True):
                raise Exception(response['message'])
            success = writeToFile(response)

        # See if the file was updated or not in console
        if (success == False):
            print("File was not updated.")

        return response

    except Exception as e:
        # Error? Return error and message
        print("[getAcademicCalendarInfo] Error: ", e)
        return ({"error": True, "message": e})
Example #6
0
def show_category(category_url):
    data = []
    response = requests.request(
        method=u'GET',
        url=category_url,
    )
    soup = BeautifulSoup(response.content, 'html.parser')
    videos = soup.findAll(u'div', {u'class': u'broadcast-player'})

    for video in videos:
        data_unique = []
        title = video.findAll(
            u'h1', {u'class': u'broadcast-player-meta__title'})[0].text
        title = " ".join(title.split())
        timecode = video.findAll(u'time')[0][u'datetime']
        parsed_timecode = datetimeparse(timecode)
        label = u'{title}'.format(title=title)
        date = u'{time}'.format(
            time=parsed_timecode.strftime(u'%Y-%m-%d %H:%M'))
        videoJSONStr = video.find("script").contents[0]
        videoJSON = json.loads(videoJSONStr)

        # add each quality version of the video as an entry
        for vid in videoJSON['formats']:
            quality = vid['name']
            if quality not in data_unique:
                data_unique.append(quality)
                file_url = vid['url']['mp4'].replace("&legacy=resolve.php", "")
                data.append({
                    u'label': label + ' - ' + date + ' (' + quality + ')',
                    u'path': {
                        u'endpoint': u'show_video',
                        u'video_url':
                        u'{video_url}'.format(video_url=file_url),
                    },
                    u'is_playable': True,
                })
    return data
Example #7
0
def show_category(category_url):
    data = []
    response = requests.request(
        method=u'GET',
        url=category_url,
    )
    soup = BeautifulSoup(response.content)
    videos = soup.findAll(
        u'li',
        {u'class': u'broadcast-player__playlist__item'},
    )
    for video in videos:
        title = video.findAll(
            u'span',
            {u'class': u'broadcast-link__name '},
        )[0].text
        timecode = video.findAll(u'time')[0][u'datetime']
        parsed_timecode = datetimeparse(timecode)
        label = u'{title} {time}'.format(
            title=title,
            time=parsed_timecode.strftime(u'%Y-%m-%d %H:%M'),
        )
        video_url = video.findAll(u'a')[0][u'href']
        data.append(
            {
                u'label': label,
                u'path': {
                    u'endpoint': u'show_video',
                    u'video_url': u'https://nos.nl{video_url}'.format(
                        video_url=video_url,
                    ),
                },
                u'is_playable': True,
            }
        )
    return data
Example #8
0
    def removal_id_filter(instance):
        """Filter function to determine which classic event instances need to be removed

        Args:
            instance (event: Dict): a specific instance of a recurring event from Google Cal API response

        Returns:
            bool: whether or not the date should be kept of removed. Follows in-built filter conventions
        """

        test = datetimeparse(
            instance['start']['dateTime']).replace(tzinfo=None)
        flag = True

        fall_start = datetimeparse(
            importantEvents['periods']['fall_semester']["start"]["start"]
            ["datetime"]).replace(tzinfo=None)
        fall_end = datetimeparse(
            importantEvents['periods']['fall_semester']["end"]["start"]
            ["datetime"]).replace(tzinfo=None)

        spring_start = datetimeparse(
            importantEvents['periods']['spring_semester']["start"]["start"]
            ["datetime"]).replace(tzinfo=None)
        spring_end = datetimeparse(
            importantEvents['periods']['spring_semester']["end"]["start"]
            ["datetime"]).replace(tzinfo=None)

        # class occurs in fall semester
        if test > fall_start and test < fall_end:
            flag = False

        # before spring semester begins
        if test > spring_start and test < spring_end:
            flag = False

        # is on any blacklisted dates
        for holiday in importantEvents['holidays']:
            if test.date() == datetimeparse(holiday).date() and not flag:
                flag = True

        return flag
Example #9
0
def getRate4(url,
             start='1900-1-1',
             end='1900-1-1',
             maxPage=1000,
             reqinterval=0.3):

    start = datetimeparse(start)
    end = datetimeparse(end)
    if start >= end:
        return []
    INFOS = []
    INFO = {
        'siteId': None,
        'shopId': None,
        'aucNumId': None,
        'sku': None,
        'title': None,
        'content': None,
        'date': None,
        'rateId': None,
        'anony': None,
        'nick': None,
        'nickUrl': None,
        'rank': None,
        'rankUrl': None,
        'userId': None,
        'vipLevel': None,
        'validscore': None,
        'appendId': None,
        'appendContent': None,
        'appendAfter': None,
        'appendTime': None,
        'appendReply': None,
        'spuRatting': None
    }
    r = request(url, requiredPropertyRegx=r'shopId\=\d+')
    s = r.content
    shopId = re.findall(r'shopId\=(\d+)', s)[0]
    siteId = re.findall(r'siteId\=(\d)', s)[0]
    itemId = re.findall(r'\"itemId\"\s*\:\s*\"(\d+)\"', s)[0]
    domain = ''
    page = 1
    if siteId == '2':
        try:
            domain = re.findall(r'\"apiTmallReview\"\:\"(.+?)\"', s)[0]
            domain = domain.replace('\\', '') + '&'
            domain = domain \
                + 'order=1&append=0&currentPage=1&forShop=1&ismore=1&callback=jsonp%d' \
                % (time.time() * 1000)
        except:
            domain = 'http://rate.tmall.com/list_detail_rate.htm'
            params = {
                'callback': 'jsonp%d' % (time.time() * 1000),
                'itemId': re.findall(r'item_id=(\d+)', s)[0],
                'spuId': re.findall(r'spu_id=(\d+)', s)[0],
                'sellerId': re.findall(r'user_num_id=(\d+)', s)[0],
                'order': '1',
                'append': '0',
                'currentPage': '1',
                'ismore': '1',
                'forShop': '1',
            }
            domain = domain + '?' + '&'.join(
                ['%s=%s' % (k, str(v)) for (k, v) in params.items()])
    else:
        domain = re.findall(r'data\-listApi\=\"(.+?)\"', s)[0]
        params = {
            'currentPageNum': 1,
            'rateType': '',
            'orderType': '',
            'showContent': '1',
            'attribute': '',
            'callback': 'jsonp_reviews_list',
        }
        domain = domain + '&' + '&'.join([
            k + '=' + str(v) for (k, v) in params.items()
        ]).replace('showContent=1', 'showContent=')
    rateIds = []
    while 1:
        if page > maxPage or page > 200:
            break
        stop = False
        if domain.find('currentPageNum') != -1:
            domain = domain.replace(
                re.findall(r'currentPageNum\=\d+', domain)[0],
                'currentPageNum=' + str(page))
        else:
            domain = domain.replace(
                re.findall(r'currentPage\=\d+', domain)[0],
                'currentPage=' + str(page))
        time.sleep(reqinterval)
        r = request(domain)
        s = r.content
        if siteId == '2':
            result = re.findall(r'\{\"aliMallSeller\"\:.+?\"userVipPic\".+?\}',
                                s)
            keys = [
                'aucNumId',
                'auctionSku',
                'title',
                'rateContent',
                'rateDate',
                'id',
                'anony',
                'displayUserNick',
                'displayUserLink',
                'displayRateSum',
                'displayRatePic',
                'displayUserNumId',
                'userVipLevel',
                'rateResult',
            ]
        else:
            result = re.findall(r'\{\"append\"\:.+?\"validscore\".+?\}', s)
            keys = [
                'aucNumId',
                'sku',
                'title',
                'content',
                'date',
                'rateId',
                'anony',
                'nick',
                'nickUrl',
                'rank',
                'rankUrl',
                'userId',
                'vipLevel',
                'validscore',
            ]
        metadata = []
        for (i, t) in enumerate(result):
            metadata = []
            for key in keys:
                t2 = re.findall(r'\"' + key + r'\"\:\"*(.*?)\"*[\,\}]', t)
                if len(t2) > 0:
                    t2 = t2[0]
                    t2 = t2.decode(r.encoding, 'ignore').encode('utf-8')
                else:
                    t2 = None
                metadata.append(t2)
            if metadata[6] == 'true':
                metadata[8] = None
                metadata[10] = None

            # 追加评论

            if siteId == '2':
                appendComment = re.findall(r'\"appendComment\"\s*\:\s*\{.+?\}',
                                           t)
                appendComment = (appendComment[0]
                                 if len(appendComment) > 0 else None)
                if appendComment:
                    metadata.extend([
                        re.findall(r'\"commentId\"\s*\:\s*(\d+)',
                                   appendComment),
                        re.findall(r'\"content\"\s*\:\s*\"(.+?)\"',
                                   appendComment), None,
                        re.findall(r'\"commentTime\"\s*\:\s*\"(.+?)\"',
                                   appendComment),
                        re.findall(r'\"reply\"\s*\:\s*\"(.*?)\"',
                                   appendComment)
                    ])
                else:
                    metadata.extend([None] * 5)
            else:
                appendComment = re.findall(r'\"append\"\s*\:\s*\{.+?\}', t)
                appendComment = (appendComment[0]
                                 if len(appendComment) > 0 else None)
                if appendComment:
                    metadata.extend([
                        None,
                        re.findall(r'\"content\"\s*\:\s*\"(.+?)\"',
                                   appendComment),
                        re.findall(r'\"dayAfterConfirm\"\s*\:\s*\"(.+?)\"',
                                   appendComment), None,
                        re.findall(r'\"reply\"\s*\:\s*\"(.*?)\"',
                                   appendComment)
                    ])
                else:
                    metadata.extend([None] * 5)

            # spuRatting
            spuRatting = re.search(r'("spuRatting"\:\[.+\])', t, re.S)
            if spuRatting:
                spuRatting = spuRatting.group(1).decode(
                    r.encoding).encode('utf-8')
                spuRatting = yaml.load(_parse_json_str(spuRatting))
                spuRatting = ';'.join([
                    '%s:%s' % (spu['name'], spu['desc'])
                    for spu in spuRatting['spuRatting']
                ])
                metadata.append(spuRatting.encode('utf-8'))
            else:
                metadata.append(None)

            # handle regx and encoding

            for i in range(12, 17):
                if not metadata[i]:
                    continue
                metadata[i] = (metadata[i][0]
                               if len(metadata[i]) > 0 else None)
                if metadata[i]:
                    metadata[i] = metadata[i].decode(r.encoding,
                                                     'ignore').encode('utf-8')

            # handle end
            metadata[4] = metadata[4].\
                replace('年', '-').\
                replace('月', '-').\
                replace('日', '')

            if datetimeparse(metadata[4]) < end:
                INFOS.append([siteId, shopId, itemId] + metadata[1:])
        page += 1
        if len(INFOS) > 0 and len(metadata) > 0:
            t1 = re.findall(r'\d+.+?\d+.+?\d+', metadata[4])[0]
            t1 = datetimeparse(t1)
            if t1 < start or stop:
                break
        if len(result) < 20:
            break
    return INFOS
Example #10
0
 def extract(value):
     if value == '':
         return None
     dt = datetimeparse(value).astimezone(local_timezone).replace(
         tzinfo=None)
     return dt
Example #11
0
def executeItemTradeSchedule(
    schedule_name='Schedule_itemTrade2',
    schedule_error_name='Schedule_itemTradeError',
    schedule_process_name='Schedule_itemTrade_process',
    schedule_update_name='TopItemTradeUpdateTime',
    schedule_config_uri_name='TopItemTradeUri',
    schedule_config_id_name='TopItemShopId',
):

    global RE_CONN, INTERRUPT
    dbConnPool = _reload_slave_db_pool(schedule_name)
    n = 0
    reqinterval = (slave_config.get(IP)['reqinterval'] if slave_config.get(IP)
                   else slave_config.get('default')['reqinterval'])

    for i in range(10):
        if INTERRUPT:
            INTERRUPT = False
            break
        # slave config
        _slave_interval(n, 1)

        # keep connection with Redis Server

        itemId = None
        while 1:
            try:
                itemId = RE_CONN.spop(schedule_name)
                RE_CONN.hset(schedule_process_name, itemId, 1)
                n = (n + 1) % 100
                break
            except:
                RE_CONN = getRedisConn2()
        toDate = datetime.strftime(datetime.today(), '%Y/%m/%d')
        if itemId:
            (fromDate, param_uri_meta,
             config_id_meta) = ibbdlib.redislib.redis_pipe(
                 RE_CONN, [['hget', schedule_update_name,
                            str(itemId)],
                           ['hget', schedule_config_uri_name,
                            str(itemId)],
                           ['hget', schedule_config_id_name,
                            str(itemId)]])
            fromDate = fromDate or '1900/1/1'
            param_uri_meta = (json.loads(unquote(param_uri_meta))
                              if param_uri_meta else {})
            config_id_meta = (json.loads(config_id_meta)
                              if config_id_meta else {})
            _slave_info(schedule_name, 'Start', itemId, fromDate, toDate)
            try:
                t1 = t2 = time.time()
                (data, param_uri, config_id) = getTrades3(
                    'http://item.taobao.com/item.htm?id=%s' % itemId,
                    fromDate,
                    toDate,
                    reqUri=param_uri_meta,
                    config=config_id_meta,
                    reqinterval=reqinterval,
                )
                t2 = time.time()

                if len(data) > 0:

                    re_command = [
                        'HSET', schedule_update_name, itemId,
                        (datetimeparse(data[0][10]) +
                         timedelta(1)).strftime('%Y/%m/%d')
                    ]

                    bll_queue.put(
                        (saveTopTrade, data, dbConnPool, None, schedule_name,
                         itemId, schedule_process_name, schedule_error_name,
                         t1, t2, re_command))
                    if not param_uri_meta:
                        RE_CONN.hset(schedule_config_uri_name, str(itemId),
                                     quote(json.dumps(param_uri)))
                    if not config_id_meta:
                        RE_CONN.hset(schedule_config_id_name, str(itemId),
                                     json.dumps(config_id))
                else:
                    _slave_info(
                        schedule_name,
                        'Success',
                        itemId,
                        'len',
                        0,
                        'req',
                        '%.1f' % (t2 - t1),
                    )
                    RE_CONN.hdel(schedule_process_name, str(itemId))
            except Exception, e:
                traceback.print_exc()
                _slave_error(
                    schedule_name,
                    'Error',
                    itemId,
                    str(e),
                    'req',
                    '%.1f' % (t2 - t1),
                    'save',
                    '%.1f' % (time.time() - t2),
                )
                RE_CONN.hset(schedule_error_name, itemId,
                             json.dumps({
                                 'SlaveID': SLAVEID,
                                 'msg': str(e)
                             }))
        else:
            break
Example #12
0
 def extract(value):
     if value == '':
         return None
     dt = datetimeparse(value).astimezone(
         local_timezone).replace(tzinfo=None)
     return dt
Example #13
0
		data = cur.fetchall()
		cur.close()
		self.conn.commit()
		toreturn = []
		#I'm sure this could be done in SQL but this way is simpler
		length = len(data)-1
		for index, field in enumerate(data):
			if field[2]:
				index+=1
				while index<=length and data[index][2]:
					data.pop(index)
					length-=1
				time = (data[index][3] - field[3]) if index<=length else (datetime.datetime.now(pytz.utc)-field[3])
				try:
					if not 'until' in args or datetimeparse(args['until'])>=field[3]:
						if not 'from' in args or datetimeparse(args['from'])<=field[3]:
							toreturn.append({
								'login_time': field[3].isoformat(),
								'online_time': str(time),
							})
				except ValueError, e:
					return [str(e),]
		return toreturn

#Similar to players, but provides total session information
class times_handler(base_handler):
	@classmethod
	def handle_category(self, args):
		cur = self.conn.cursor()
		sql = 'SELECT DISTINCT player_name FROM skynet_events;'
def getImportantEvents():
    """Uses academic calendar information to determine holidays and semester periods

    Returns:
        dict: list of all important events, periods of fall and spring semester, and list of blacklisted dates
    """

    academicCal = getAcademicCalendarInfo()
    importantEvents = []
    result = {}

    # Checks to see that we actually have academic calendar information
    if academicCal["error"]:
        return

    # Get all the important dates that we are looking for in the calendar
    for event in academicCal["events"]:
        if event["summary"].find("Classes Begin") != -1:
            importantEvents.append(event)

        if event["summary"].find("Classes End") != -1:
            importantEvents.append(event)

        if event["summary"].find("Holiday:") != -1:
            importantEvents.append(event)

        if event["summary"].find("Thanksgiving") != -1:
            importantEvents.append(event)

        if event["summary"].find("Spring Break") != -1:
            importantEvents.append(event)

        if event["summary"].find("Classes Resume") != -1:
            importantEvents.append(event)

        if event["summary"].find("Easter") != -1:
            importantEvents.append(event)

    result["important_events"] = importantEvents

    # Calculate semester periods
    fall_semester = []
    spring_semester = []
    holidays = []

    # Checks to see which events fall in which semester
    for event in importantEvents:
        event_date = datetimeparse(event['start']['datetime'])

        if (event_date.month > 7):
            fall_semester.append(event)

        else:
            spring_semester.append(event)

        if event['summary'].find("Holiday") != -1:
            holidays.append(event['start']['datetime'])

    # Get the start and end periods
    periods = {
        "fall_semester": getStartEnd(fall_semester),
        "spring_semester": getStartEnd(spring_semester)
    }

    # Get Thanksgiving dates
    thanksgiving_period = [
        event for event in fall_semester
        if event["summary"].find("Thanksgiving") != -1
        or event["summary"].find("Classes Resume") != -1
    ]

    # Get Spring Break dates
    springbreak_period = [
        event for event in spring_semester
        if event["summary"].find("Spring Break") != -1 or (
            event["summary"].find("Classes Resume") != -1
            and event['start']['datetime'].split("-")[1] == "03")
    ]

    # Get East Break dates
    easterbreak_period = [
        event for event in spring_semester
        if event["summary"].find("Easter") != -1 or (
            event["summary"].find("Classes Resume") != -1
            and event['start']['datetime'].split("-")[1] == "04")
    ]

    if len(thanksgiving_period) == 2 and len(springbreak_period) == 2 and len(
            easterbreak_period) == 2:
        # Processing spring break and thanksgiving break as they follow the same school policy
        for breakperiod in [thanksgiving_period, springbreak_period]:
            start = datetimeparse(
                breakperiod[0]["start"]["datetime"]) + datetime.timedelta(
                    days=1)
            end = datetimeparse(breakperiod[1]["start"]["datetime"])

            while start != end:
                holidays.append(start.strftime("%Y-%m-%d"))
                start = start + datetime.timedelta(days=1)

        # Processing east break dates
        easter_start = datetimeparse(
            easterbreak_period[0]["start"]["datetime"])
        easter_end = datetimeparse(easterbreak_period[1]["start"]["datetime"])

        while easter_start != easter_end:
            holidays.append(easter_start.strftime("%Y-%m-%d"))
            easter_start = easter_start + datetime.timedelta(days=1)
    else:
        print("Break period arrays are not long enough.")

    result["periods"] = periods

    result["holidays"] = holidays

    return result