コード例 #1
0
 def save():
     try:
         fs = open('record.pkl', 'wb')
         pickle.dump(self.startId, fs, 0)
         pickle.dump(self.lastQuery, fs, 0)
         pickle.dump(self.cache, fs, 0)
         pickle.dump(self.maybe, fs, 0)
         fs.close()
     except IOError:
         menulog.debug(u'保存缓存失败')
コード例 #2
0
ファイル: bgtask.py プロジェクト: a4963290/neteaseMenu
 def save():
     try:
         fs = open('record.pkl', 'wb')
         pickle.dump(self.startId, fs, 0)
         pickle.dump(self.lastQuery, fs, 0)
         pickle.dump(self.cache, fs, 0)
         pickle.dump(self.maybe, fs, 0)
         fs.close()
     except IOError:
         menulog.debug(u'保存缓存失败')
コード例 #3
0
ファイル: bgtask.py プロジェクト: arcsun/neteaseMenu
def getWebContent(url):
    try:
        url += '&companyId=1'
        req = urllib2.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 6.0; PRO 6 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/44.0.2403.130 Mobile Safari/537.36 YiXin/4.8.3')
        res = urllib2.urlopen(req)
        html = res.read().decode('utf-8')
        return html
    except Exception as e:
        menulog.debug(str(e))
        return ''
コード例 #4
0
ファイル: bgtask.py プロジェクト: arcsun/neteaseMenu
def getWebContent(url):
    try:
        url += '&companyId=1'
        req = urllib2.Request(url)
        req.add_header(
            'User-Agent',
            'Mozilla/5.0 (Linux; Android 6.0; PRO 6 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/44.0.2403.130 Mobile Safari/537.36 YiXin/4.8.3'
        )
        res = urllib2.urlopen(req)
        html = res.read().decode('utf-8')
        return html
    except Exception as e:
        menulog.debug(str(e))
        return ''
コード例 #5
0
def addOne(page=1):
    """访问计数"""
    try:
        if not s:
            globals()['s'] = shelve.open('visit_count.dat', writeback=True)
        if page == 0:
            s['count_home'] = 0 if s.get(
                'count_home') is None else s['count_home'] + 1
        elif page == 1:
            s['count_menu'] = 0 if s.get(
                'count_menu') is None else s['count_menu'] + 1
        s.sync()
    except Exception as e:
        menulog.debug(e)
コード例 #6
0
ファイル: start.py プロジェクト: arcsun/neteaseMenu
def getWebContent(url):
    try:
        fname = url.split('?')[1].replace('=', '_')
        if cache.get(fname):
            return cache.get(fname)
        else:
            req = urllib2.Request(url+ '&companyId=1')    # update:增加了这个参数
            req.add_header('User-Agent', 'Mozilla/5.0 (Linux; Android 6.0; PRO 6 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/44.0.2403.130 Mobile Safari/537.36 YiXin/4.8.3')
            res = urllib2.urlopen(req)
            html = res.read().decode('utf-8')
            saveCache(fname, html)
            return html
    except Exception as e:
        menulog.debug(str(e))
        return ''
コード例 #7
0
def getWebContent(url):
    try:
        fname = url.split('?')[1].replace('=', '_')
        if cache.get(fname):
            return cache.get(fname)
        else:
            req = urllib2.Request(url + '&companyId=1')  # update:增加了这个参数
            req.add_header(
                'User-Agent',
                'Mozilla/5.0 (Linux; Android 6.0; PRO 6 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/44.0.2403.130 Mobile Safari/537.36 YiXin/4.8.3'
            )
            res = urllib2.urlopen(req)
            html = res.read().decode('utf-8')
            saveCache(fname, html)
            return html
    except Exception as e:
        menulog.debug(str(e))
        return ''
コード例 #8
0
ファイル: start.py プロジェクト: arcsun/neteaseMenu
def getWeekDayFromDay(daytime):
    """根据日期(如20160517)计算是星期几"""
    try:
        daytime = '20'+ str(daytime)    # '20160517'
        year = int(daytime[:4])         # 2016
        month = int(daytime[4:6])       # 5
        day = int(daytime[6:8])         # 17
        weekday = datetime(year, month, day, 0, 0, 0, 0).weekday()
        weekdaynames= {
            0: u'星期一',
            1: u'星期二',
            2: u'星期三',
            3: u'星期四',
            4: u'星期五',
            5: u'星期六',
            6: u'星期日',
        }
        return weekdaynames.get(weekday, u'')
    except:
        menulog.debug(u'获取星期几错误')
        return u''
コード例 #9
0
def getWeekDayFromDay(daytime):
    """根据日期(如20160517)计算是星期几"""
    try:
        daytime = '20' + str(daytime)  # '20160517'
        year = int(daytime[:4])  # 2016
        month = int(daytime[4:6])  # 5
        day = int(daytime[6:8])  # 17
        weekday = datetime(year, month, day, 0, 0, 0, 0).weekday()
        weekdaynames = {
            0: u'星期一',
            1: u'星期二',
            2: u'星期三',
            3: u'星期四',
            4: u'星期五',
            5: u'星期六',
            6: u'星期日',
        }
        return weekdaynames.get(weekday, u'')
    except:
        menulog.debug(u'获取星期几错误')
        return u''
コード例 #10
0
ファイル: bgtask.py プロジェクト: arcsun/neteaseMenu
    def process(self):
        self.count += 1
        self.today = int(time.strftime('%y%m%d', time.localtime()))
        menulog.info(u'开始第%d次查找@%d' % (self.count, self.getTime()))

        try:
            db = dbm.open(datafile, 'c')
            if not len(db):
                # 没有之前的数据文件
                db['startId'] = str(startId)
                db['lastQuery'] = str(self.getTime())
                db['cache'] = str(self.cache)
                db['maybe'] = str(self.maybe)

            self.startId = eval(db['startId']) - self.back
            self.cache = eval(db['cache'])
            self.maybe = eval(db['maybe'])
            self.nowId = self.startId

            self.lastQuery = self.getTime()  # 保存最后搜索时间

            while self.nowId - self.startId < self.interval:
                menulog.info(u'开始查找: %d' % self.nowId)
                text = getWebContent(urlhead + str(self.nowId))
                if text.find(u'今日菜单') != -1 and text.find(u'本帮菜') != -1:
                    self.empty = 0
                    try:
                        year = re.findall(pattern_year, text)[0]
                        monthday = re.findall(pattern_month, text)

                        if monthday[0] == '0' and len(monthday) > 2:
                            month = monthday[0] + monthday[1]
                            dayIndex = 2
                        else:
                            month = monthday[0]
                            dayIndex = 1

                        if len(monthday) > dayIndex:
                            day = monthday[dayIndex]
                            if len(day) == 1:
                                # 针对 1</span>...>5日&nbsp
                                # 上面的月份也有这种情况
                                day += re.findall(pattern_day2, text)[0]
                        else:
                            day = re.findall(pattern_day, text)[0]

                        update_month = re.findall(pattern_month_update,
                                                  text)[0]  # 发布菜单的月份,用于跨年
                        if int(update_month) == 12 and int(month) == 1:
                            year = str(int(year) + 1)
                        thisday = int(year + month + day)

                        self.startId = self.nowId
                        if self.cache.has_key(thisday):
                            menulog.info(u'更新%s的菜单id为%s' %
                                         (thisday, self.nowId))
                        self.cache[thisday] = self.nowId
                        menulog.info('find %d' % self.nowId)
                    except (IndexError, ):
                        if self.nowId not in self.maybe:
                            self.maybe.append(self.nowId)
                            menulog.debug('IndexError add maybe')

                else:
                    if text.find(u'请求素材不存在') == -1:
                        # 搜索到的结果页有内容(不是菜单)
                        self.usedId = self.nowId
                        self.empty = 0
                    else:
                        self.empty += 1
                        menulog.info('empty(%d) %d' % (self.empty, self.nowId))
                        if self.empty > self.maxEmpty:
                            menulog.debug('break this round')
                            break
                self.nowId += 1

            # if self.maybe and max(self.maybe) > max(self.cache.values()):
            #   # 取消这个设计, 格式变化太大, 很可能导致卡住
            # menulog.info(u'更新起点至可能的ID:%d'% max(self.maybe))
            # self.startId = max(self.maybe)
            if self.usedId > self.startId:
                menulog.info(u'更新起点至%d' % self.usedId)
                self.startId = self.usedId

            # 保存
            db['startId'] = str(self.startId)
            db['lastQuery'] = str(self.lastQuery)
            db['cache'] = str(self.cache)
            db['maybe'] = str(self.maybe)
            menulog.info(u'第%d次查找结束' % self.count)

            # 已更新的菜单
            self.cache = eval(db['cache'])
            future = []
            for day in self.cache.keys():
                if day >= self.today:
                    future.append(day)
            future.sort()
            db['future'] = str(future)
            menulog.info(u'更新今后已找到的菜单列表')
            db.close()
        except (IOError, EOFError):
            menulog.info(u'缓存读取/创建异常')
        finally:
            self.running = False
コード例 #11
0
ファイル: bgtask.py プロジェクト: arcsun/neteaseMenu
    def process(self):
        self.count += 1
        self.today = int(time.strftime('%y%m%d', time.localtime()))
        menulog.info(u'开始第%d次查找@%d'% (self.count, self.getTime()))

        try:
            db = dbm.open(datafile, 'c')
            if not len(db):
                # 没有之前的数据文件
                db['startId'] = str(startId)
                db['lastQuery'] = str(self.getTime())
                db['cache'] = str(self.cache)
                db['maybe'] = str(self.maybe)

            self.startId = eval(db['startId']) - self.back
            self.cache = eval(db['cache'])
            self.maybe = eval(db['maybe'])
            self.nowId = self.startId
            self.lastQuery = self.getTime()        # 保存最后搜索时间

            while self.nowId - self.startId < self.interval:
                menulog.info(u'开始查找: %d'% self.nowId)
                text = getWebContent(urlhead+ str(self.nowId))
                if text.find(u'今日菜单') != -1:
                    self.empty = 0
                    try:
                        year = re.findall(pattern_year, text)[0]
                        monthday = re.findall(pattern_month, text)

                        if monthday[0] == '0' and len(monthday)> 2:
                            month = monthday[0]+monthday[1]
                            dayIndex = 2
                        else:
                            month = monthday[0]
                            dayIndex = 1

                        if len(monthday) > dayIndex:
                            day = monthday[dayIndex]
                            if len(day) == 1:
                                # 针对 1</span>...>5日&nbsp
                                # 上面的月份也有这种情况
                                day += re.findall(pattern_day2, text)[0]
                        else:
                            day = re.findall(pattern_day, text)[0]

                        update_month = re.findall(pattern_month_update, text)[0]  # 发布菜单的月份,用于跨年
                        if int(update_month) == 12 and int(month) == 1:
                            year = str(int(year)+1)
                        thisday = int(year+month+day)

                        self.startId = self.nowId
                        if self.cache.has_key(thisday):
                            menulog.info(u'更新%s的菜单id为%s'% (thisday, self.nowId))
                        self.cache[thisday] = self.nowId
                        menulog.info('find %d'% self.nowId)
                    except (IndexError, ):
                        if text.find(u'祝您用餐愉快') and text.find(u'农历'):
                            menulog.debug('gz menu')
                        elif self.nowId not in self.maybe:
                            self.maybe.append(self.nowId)
                            menulog.debug('IndexError add maybe')

                else:
                    if text.find(u'请求素材不存在') == -1:
                        # 搜索到的结果页有内容(不是菜单)
                        self.usedId = self.nowId
                        self.empty = 0
                    else:
                        self.empty += 1
                        if self.empty > 10:
                            menulog.debug('break this round')
                            break
                self.nowId += 1

            # if self.maybe and max(self.maybe) > max(self.cache.values()):
            #   # 取消这个设计, 格式变化太大, 很可能导致卡住
                # menulog.info(u'更新起点至可能的ID:%d'% max(self.maybe))
                # self.startId = max(self.maybe)
            if self.usedId > self.startId:
                menulog.info(u'更新起点至%d'% self.usedId)
                self.startId = self.usedId

            # 保存
            db['startId'] = str(self.startId)
            db['lastQuery'] = str(self.lastQuery)
            db['cache'] = str(self.cache)
            db['maybe'] = str(self.maybe)
            menulog.info(u'第%d次查找结束'% self.count)

            # 已更新的菜单
            self.cache = eval(db['cache'])
            future = []
            for day in self.cache.keys():
                if day >= self.today:
                    future.append(day)
            future.sort()
            db['future'] = str(future)
            menulog.info(u'更新今后已找到的菜单列表')
            db.close()
        except (IOError, EOFError):
            menulog.info(u'缓存读取/创建异常')
        finally:
            self.running = False
コード例 #12
0
    def process(self):
        def save():
            try:
                fs = open('record.pkl', 'wb')
                pickle.dump(self.startId, fs, 0)
                pickle.dump(self.lastQuery, fs, 0)
                pickle.dump(self.cache, fs, 0)
                pickle.dump(self.maybe, fs, 0)
                fs.close()
            except IOError:
                menulog.debug(u'保存缓存失败')

        self.count += 1
        menulog.info(u'开始第%d次查找@%d' % (self.count, self.getTime()))

        try:
            f = file('record.pkl', 'rb')
            self.startId = pickle.load(f)
            self.nowId = self.startId
            self.lastQuery = pickle.load(f)  # 注意这里会覆盖为原来的值
            self.cache = pickle.load(f)
            self.maybe = pickle.load(f)
            f.close()
        except (IOError, EOFError):
            # 没有缓存文件 或 文件内容格式不对
            menulog.info(u'缓存读取异常, 重建')
            save()

        self.lastQuery = self.getTime()  # 重新覆盖为现在的时间
        while self.nowId - self.startId < self.interval:
            menulog.info(u'开始查找: %d' % self.nowId)
            page = urllib.urlopen(urlhead + str(self.nowId))
            text = page.read().decode('utf-8')
            if text.find(u'今日菜单') != -1:
                try:
                    year = re.findall(pattern_year, text)[0]
                    month = re.findall(pattern_month, text)[0]
                    day = re.findall(pattern_day, text)[0]
                    thisday = int(year + month + day)
                    self.startId = self.nowId
                    self.cache[thisday] = self.nowId
                    menulog.info('find %d' % self.nowId)
                except (IndexError, ):
                    if text.find(u'风味小吃') != -1:
                        # 抓到了广州的菜单
                        pass
                    else:
                        if self.nowId not in self.maybe:
                            self.maybe.append(self.nowId)
                        menulog.debug('IndexError')
            else:
                if text.find(u'请求素材不存在') == -1:
                    # 搜索到的结果页有内容(不是菜单)
                    self.usedId = self.nowId
            self.nowId += 1

        if self.maybe and max(self.maybe) > max(self.cache.values()):
            # 例如先更新了15956但是样式错误, 然后用过的id更新至16xxx, 最后又把15958替换成了正确的菜单
            menulog.info(u'更新起点至可能的ID:%d' % max(self.maybe))
            self.startId = max(self.maybe)

        elif self.usedId > self.startId:
            menulog.info(u'更新起点至%d' % self.usedId)
            self.startId = self.usedId

        menulog.info(u'第%d次查找结束' % self.count)
        save()
        self.running = False
コード例 #13
0
ファイル: bgtask.py プロジェクト: a4963290/neteaseMenu
    def process(self):
        def save():
            try:
                fs = open('record.pkl', 'wb')
                pickle.dump(self.startId, fs, 0)
                pickle.dump(self.lastQuery, fs, 0)
                pickle.dump(self.cache, fs, 0)
                pickle.dump(self.maybe, fs, 0)
                fs.close()
            except IOError:
                menulog.debug(u'保存缓存失败')

        self.count += 1
        menulog.info(u'开始第%d次查找@%d'% (self.count, self.getTime()))

        try:
            f = file('record.pkl', 'rb')
            self.startId = pickle.load(f)
            self.nowId = self.startId
            self.lastQuery = pickle.load(f)    # 注意这里会覆盖为原来的值
            self.cache = pickle.load(f)
            self.maybe = pickle.load(f)
            f.close()
        except (IOError, EOFError):
            # 没有缓存文件 或 文件内容格式不对
            menulog.info(u'缓存读取异常, 重建')
            save()

        self.lastQuery = self.getTime()   # 重新覆盖为现在的时间
        while self.nowId - self.startId < self.interval:
            menulog.info(u'开始查找: %d'% self.nowId)
            page = urllib.urlopen(urlhead+ str(self.nowId))
            text = page.read().decode('utf-8')
            if text.find(u'今日菜单') != -1:
                try:
                    year = re.findall(pattern_year, text)[0]
                    month = re.findall(pattern_month, text)[0]
                    day = re.findall(pattern_day, text)[0]
                    thisday = int(year+month+day)
                    self.startId = self.nowId
                    self.cache[thisday] = self.nowId
                    menulog.info('find %d'% self.nowId)
                except (IndexError, ):
                    if text.find(u'风味小吃') != -1:
                        # 抓到了广州的菜单
                        pass
                    else:
                        if self.nowId not in self.maybe:
                            self.maybe.append(self.nowId)
                        menulog.debug('IndexError')
            else:
                if text.find(u'请求素材不存在') == -1:
                    # 搜索到的结果页有内容(不是菜单)
                    self.usedId = self.nowId
            self.nowId += 1

        if self.maybe and max(self.maybe) > max(self.cache.values()):
            # 例如先更新了15956但是样式错误, 然后用过的id更新至16xxx, 最后又把15958替换成了正确的菜单
            menulog.info(u'更新起点至可能的ID:%d'% max(self.maybe))
            self.startId = max(self.maybe)

        elif self.usedId > self.startId:
            menulog.info(u'更新起点至%d'% self.usedId)
            self.startId = self.usedId

        menulog.info(u'第%d次查找结束'% self.count)
        save()
        self.running = False