def menuList(): addOne(0) globals()['visitHome'] += 1 menulog.info(u'访问主页@%s' % visitHome) try: db = dbm.open('datafile', 'c') cache = eval(db['cache']) future = eval(db['future']) maybe = eval(db['maybe']) maybe.sort() vals = {} for day in future: vals[day] = cache[day] db.close() weekdays = {} for day in vals.keys(): weekdays[day] = getWeekDayFromDay(day) return render_template('menu.html', vals=vals, days=future, weekdays=weekdays, maybe=maybe, total=(s.get('count_menu'), s.get('count_home'))) except (IOError, KeyError): msg = u'缓存读取错误' menulog.info(msg) return msg
def bus(): # 班车路线页, 中转一下 globals()['visit'] += 1 menulog.info(u'访问菜单@%s'% visit) url = "http://numenplus.yixin.im/multiNewsWap.do?multiNewsId=9182" # 这个地址隔段时间就会变一次,改为抓取? try: return getWebContent(url) except: return u'网页访问出错'
def bus(): # 班车路线页, 中转一下 addOne(1) globals()['visit'] += 1 menulog.info(u'访问菜单@%s' % visit) url = "http://numenplus.yixin.im/multiNewsWap.do?multiNewsId=17011" # 更新周期很长,暂手动更新 try: return getWebContent(url) except: return u'网页访问出错'
def checkSign(signature, timestamp, nonce): # 微信签名 args = [] args.append("token=%s" % token) args.append("timestamp=%s" % timestamp) args.append("nonce=%s" % nonce) args = sorted(args) raw = "&".join(args) sign = hashlib.sha1(raw).hexdigest() menulog.info(signature) menulog.info(sign) return signature == sign
def clearMaybe(): # 清空可能的菜单(maybe=[]) try: db = dbm.open('datafile', 'w') db['maybe'] = '[]' db.close() msg = u'清空maybe' menulog.info(msg) return msg except (IOError, KeyError): msg = u'缓存读取错误' menulog.info(msg) return msg
def menus(day=0): # 为解决微信内跳转卡住的问题, 增加这个方法 # 服务器从易信读取网页信息后再返回给用户 from codepy import menu if request.method == 'POST': day = int(request.form['day']) globals()['visit'] += 1 menulog.info(u'访问菜单@%s'% visit) url = menu.Menu(day).process() if url.startswith('http'): return getWebContent(url) else: return url
def weixin_sign(): # 微信配置认证 menulog.info('weixin sign') signature = request.args.get('signature', '') timestamp = request.args.get('timestamp', '') nonce = request.args.get('nonce', '') echostr = request.args.get('echostr', '') valid = checkSign(signature, timestamp, nonce) if valid: return echostr else: # 目前签名有bug,暂都返回成功 return echostr
def menus(day=0): # 为解决微信内跳转卡住的问题, 增加这个方法 # 服务器从易信读取网页信息后再返回给用户 from codepy import menu if request.method == 'POST': day = int(request.form['day']) addOne(1) globals()['visit'] += 1 menulog.info(u'访问菜单@%s' % visit) url = menu.Menu(day).process() if url.startswith('http'): return getWebContent(url) else: return url
def schedule(self): if self.firstRun: self.firstRun = False self.process() self.schedule() # 可用Timer().start()替换 else: while True: time.sleep(0.1) # 可以极大减少cpu占用 if self.getTime() % self.frequency == 0 and not self.running: self.running = True self.process() elif self.getTime() % 3600 == 0: # 每3600s记录一次存活信息 menulog.info('%s@%d'% (time.strftime('20%y-%m-%d %H:%M:%S', time.localtime()), self.getTime())) time.sleep(1)
def schedule(self): if self.firstRun: self.firstRun = False self.process() self.schedule() else: while True: time.sleep(0.1) if self.getTime() % self.frequency == 0 and not self.running: self.running = True # time.sleep(1) # 加上标志位仍会触发多次, sleep可保证只触发一次 self.process() elif self.getTime() % 60 == 0: menulog.info('%s@%d'% (time.strftime('20%y-%m-%d %H:%M:%S', time.localtime()), self.getTime())) time.sleep(1)
def start(startid=17000): # 设置起始查找点为指定值 try: if request.method == 'POST': startid = int(request.form['startid']) db = dbm.open('datafile', 'w') db['startId'] = str(startid) db.close() msg = u'设置查找起点ID为:%d' % startid menulog.info(msg) return msg except (IOError, KeyError): msg = u'缓存/POST参数读取错误' menulog.info(msg) return msg
def start(startid= 17000): # 设置起始查找点为指定值 try: if request.method == 'POST': startid = int(request.form['startid']) db = dbm.open('datafile', 'w') db['startId'] = str(startid) db.close() msg = u'设置查找起点ID为:%d'% startid menulog.info(msg) return msg except (IOError, KeyError): msg = u'缓存/POST参数读取错误' menulog.info(msg) return msg
def delete(day= 150101): try: db = dbm.open('datafile', 'w') if request.method == 'POST': day = int(request.form['day']) cache = eval(db['cache']) if cache.has_key(day): del cache[day] msg = u'删除%s'% day else: msg = u'del key not found' menulog.info(msg) db['cache'] = str(cache) db.close() return msg except (IOError, KeyError): return u'缓存读取错误'
def refreshlist(): try: db = dbm.open('datafile', 'w') cache = eval(db['cache']) future = [] today = int(time.strftime('%y%m%d', time.localtime(time.time()))) for day in cache.keys(): if day >= today: future.append(day) future.sort() db['future'] = str(future) msg = u'更新%s后已找到的菜单列表 from homepage' % today menulog.info(msg) db.close() return msg except (IOError, KeyError): return u'缓存读取错误'
def refreshlist(): try: db = dbm.open('datafile', 'w') cache = eval(db['cache']) future = [] today = int(time.strftime('%y%m%d',time.localtime(time.time()))) for day in cache.keys(): if day >= today: future.append(day) future.sort() db['future'] = str(future) msg = u'更新%s后已找到的菜单列表 from homepage'% today menulog.info(msg) db.close() return msg except (IOError, KeyError): return u'缓存读取错误'
def schedule(self): if self.firstRun: self.firstRun = False self.process() self.schedule() # 可用Timer().start()替换 else: while True: time.sleep(0.1) # 可以极大减少cpu占用 if self.getTime() % self.frequency == 0 and not self.running: self.running = True self.process() elif self.getTime() % 3600 == 0: # 每3600s记录一次存活信息 menulog.info( '%s@%d' % (time.strftime('20%y-%m-%d %H:%M:%S', time.localtime()), self.getTime())) time.sleep(1)
def delete(day=150101): try: db = dbm.open('datafile', 'w') if request.method == 'POST': day = int(request.form['day']) cache = eval(db['cache']) if cache.has_key(day): del cache[day] msg = u'删除%s' % day else: msg = u'del key not found' menulog.info(msg) db['cache'] = str(cache) db.close() return msg except (IOError, KeyError): return u'缓存读取错误'
def add(day= 151203, mid= 17063): # 手动添加一个菜单(偶尔发布者会填错日期) try: db = dbm.open('datafile', 'w') cache = eval(db['cache']) if request.method == 'POST': day = int(request.form['day']) mid = int(request.form['mid']) cache[day] = mid db['cache'] = str(cache) msg = u'更新%s的菜单id为%s'% (day, mid) menulog.info(msg) db.close() return msg except (IOError, KeyError): msg = u'缓存/POST参数读取错误' menulog.info(msg) return msg
def schedule(self): if self.firstRun: self.firstRun = False self.process() self.schedule() else: while True: time.sleep(0.1) if self.getTime() % self.frequency == 0 and not self.running: self.running = True # time.sleep(1) # 加上标志位仍会触发多次, sleep可保证只触发一次 self.process() elif self.getTime() % 60 == 0: menulog.info( '%s@%d' % (time.strftime('20%y-%m-%d %H:%M:%S', time.localtime()), self.getTime())) time.sleep(1)
def add(day=151203, mid=17063): # 手动添加一个菜单(偶尔发布者会填错日期) try: db = dbm.open('datafile', 'w') cache = eval(db['cache']) if request.method == 'POST': day = int(request.form['day']) mid = int(request.form['mid']) cache[day] = mid db['cache'] = str(cache) msg = u'更新%s的菜单id为%s' % (day, mid) menulog.info(msg) db.close() return msg except (IOError, KeyError): msg = u'缓存/POST参数读取错误' menulog.info(msg) return msg
def delfuture(day= 161300): try: db = dbm.open('datafile', 'w') if request.method == 'POST': day = int(request.form['day']) future = eval(db['future']) if day in future: future.remove(day) msg = u'删除%s'% day else: msg = u'del key not found' menulog.info(msg) db['future'] = str(future) db.close() delete(day) return msg except (IOError, KeyError) as e: print e return u'缓存读取错误'
def menuList(): globals()['visitHome'] += 1 menulog.info(u'访问主页@%s'% visitHome) try: db = dbm.open('datafile', 'c') cache = eval(db['cache']) future = eval(db['future']) vals = {} for day in future: vals[day] = cache[day] db.close() weekdays = {} for day in vals.keys(): weekdays[day] = getWeekDayFromDay(day) return render_template('menu.html', vals= vals, days= future, weekdays= weekdays) except (IOError, KeyError): msg = u'缓存读取错误' menulog.info(msg) return msg
def delfuture(day=161300): try: db = dbm.open('datafile', 'w') if request.method == 'POST': day = int(request.form['day']) future = eval(db['future']) if day in future: future.remove(day) msg = u'删除%s' % day else: msg = u'del key not found' menulog.info(msg) db['future'] = str(future) db.close() delete(day) return msg except (IOError, KeyError) as e: print e return u'缓存读取错误'
def start(startid=15900): # 重置起始查找点为指定值 try: f = file('record.pkl', 'rb') startId = pickle.load(f) startId = startid lastQuery = pickle.load(f) cache = pickle.load(f) maybe = pickle.load(f) f.close() f = file('record.pkl', 'wb') pickle.dump(startId, f, 0) pickle.dump(lastQuery, f, 0) pickle.dump(cache, f, 0) pickle.dump(maybe, f, 0) f.close() msg = u'设置查找起点ID为:%d' % startid menulog.info(msg) return msg except (IOError, EOFError): msg = u'缓存读取错误' menulog.info(msg) return msg
def clearMaybe(): # 清空maybe try: f = file('record.pkl', 'rb') startId = pickle.load(f) lastQuery = pickle.load(f) cache = pickle.load(f) maybe = pickle.load(f) maybe = [] f.close() f = file('record.pkl', 'wb') pickle.dump(startId, f, 0) pickle.dump(lastQuery, f, 0) pickle.dump(cache, f, 0) pickle.dump(maybe, f, 0) f.close() msg = u'清空maybe' menulog.info(msg) return msg except (IOError, EOFError): msg = u'缓存读取错误' menulog.info(msg) return msg
def start(startid= 15900): # 重置起始查找点为指定值 try: f = file('record.pkl', 'rb') startId = pickle.load(f) startId = startid lastQuery = pickle.load(f) cache = pickle.load(f) maybe = pickle.load(f) f.close() f = file('record.pkl', 'wb') pickle.dump(startId, f, 0) pickle.dump(lastQuery, f, 0) pickle.dump(cache, f, 0) pickle.dump(maybe, f, 0) f.close() msg = u'设置查找起点ID为:%d'% startid menulog.info(msg) return msg except (IOError, EOFError): msg = u'缓存读取错误' menulog.info(msg) return msg
def process(self): def save(): try: fs = open('record.pkl', 'wb') pickle.dump(self.startId, fs, 0) pickle.dump(self.lastQuery, fs, 0) pickle.dump(self.cache, fs, 0) pickle.dump(self.maybe, fs, 0) fs.close() except IOError: menulog.debug(u'保存缓存失败') self.count += 1 menulog.info(u'开始第%d次查找@%d'% (self.count, self.getTime())) try: f = file('record.pkl', 'rb') self.startId = pickle.load(f) self.nowId = self.startId self.lastQuery = pickle.load(f) # 注意这里会覆盖为原来的值 self.cache = pickle.load(f) self.maybe = pickle.load(f) f.close() except (IOError, EOFError): # 没有缓存文件 或 文件内容格式不对 menulog.info(u'缓存读取异常, 重建') save() self.lastQuery = self.getTime() # 重新覆盖为现在的时间 while self.nowId - self.startId < self.interval: menulog.info(u'开始查找: %d'% self.nowId) page = urllib.urlopen(urlhead+ str(self.nowId)) text = page.read().decode('utf-8') if text.find(u'今日菜单') != -1: try: year = re.findall(pattern_year, text)[0] month = re.findall(pattern_month, text)[0] day = re.findall(pattern_day, text)[0] thisday = int(year+month+day) self.startId = self.nowId self.cache[thisday] = self.nowId menulog.info('find %d'% self.nowId) except (IndexError, ): if text.find(u'风味小吃') != -1: # 抓到了广州的菜单 pass else: if self.nowId not in self.maybe: self.maybe.append(self.nowId) menulog.debug('IndexError') else: if text.find(u'请求素材不存在') == -1: # 搜索到的结果页有内容(不是菜单) self.usedId = self.nowId self.nowId += 1 if self.maybe and max(self.maybe) > max(self.cache.values()): # 例如先更新了15956但是样式错误, 然后用过的id更新至16xxx, 最后又把15958替换成了正确的菜单 menulog.info(u'更新起点至可能的ID:%d'% max(self.maybe)) self.startId = max(self.maybe) elif self.usedId > self.startId: menulog.info(u'更新起点至%d'% self.usedId) self.startId = self.usedId menulog.info(u'第%d次查找结束'% self.count) save() self.running = False
def process(self): def save(): try: fs = open('record.pkl', 'wb') pickle.dump(self.startId, fs, 0) pickle.dump(self.lastQuery, fs, 0) pickle.dump(self.cache, fs, 0) pickle.dump(self.maybe, fs, 0) fs.close() except IOError: menulog.debug(u'保存缓存失败') self.count += 1 menulog.info(u'开始第%d次查找@%d' % (self.count, self.getTime())) try: f = file('record.pkl', 'rb') self.startId = pickle.load(f) self.nowId = self.startId self.lastQuery = pickle.load(f) # 注意这里会覆盖为原来的值 self.cache = pickle.load(f) self.maybe = pickle.load(f) f.close() except (IOError, EOFError): # 没有缓存文件 或 文件内容格式不对 menulog.info(u'缓存读取异常, 重建') save() self.lastQuery = self.getTime() # 重新覆盖为现在的时间 while self.nowId - self.startId < self.interval: menulog.info(u'开始查找: %d' % self.nowId) page = urllib.urlopen(urlhead + str(self.nowId)) text = page.read().decode('utf-8') if text.find(u'今日菜单') != -1: try: year = re.findall(pattern_year, text)[0] month = re.findall(pattern_month, text)[0] day = re.findall(pattern_day, text)[0] thisday = int(year + month + day) self.startId = self.nowId self.cache[thisday] = self.nowId menulog.info('find %d' % self.nowId) except (IndexError, ): if text.find(u'风味小吃') != -1: # 抓到了广州的菜单 pass else: if self.nowId not in self.maybe: self.maybe.append(self.nowId) menulog.debug('IndexError') else: if text.find(u'请求素材不存在') == -1: # 搜索到的结果页有内容(不是菜单) self.usedId = self.nowId self.nowId += 1 if self.maybe and max(self.maybe) > max(self.cache.values()): # 例如先更新了15956但是样式错误, 然后用过的id更新至16xxx, 最后又把15958替换成了正确的菜单 menulog.info(u'更新起点至可能的ID:%d' % max(self.maybe)) self.startId = max(self.maybe) elif self.usedId > self.startId: menulog.info(u'更新起点至%d' % self.usedId) self.startId = self.usedId menulog.info(u'第%d次查找结束' % self.count) save() self.running = False
def process(self): self.count += 1 self.today = int(time.strftime('%y%m%d', time.localtime())) menulog.info(u'开始第%d次查找@%d' % (self.count, self.getTime())) try: db = dbm.open(datafile, 'c') if not len(db): # 没有之前的数据文件 db['startId'] = str(startId) db['lastQuery'] = str(self.getTime()) db['cache'] = str(self.cache) db['maybe'] = str(self.maybe) self.startId = eval(db['startId']) - self.back self.cache = eval(db['cache']) self.maybe = eval(db['maybe']) self.nowId = self.startId self.lastQuery = self.getTime() # 保存最后搜索时间 while self.nowId - self.startId < self.interval: menulog.info(u'开始查找: %d' % self.nowId) text = getWebContent(urlhead + str(self.nowId)) if text.find(u'今日菜单') != -1 and text.find(u'本帮菜') != -1: self.empty = 0 try: year = re.findall(pattern_year, text)[0] monthday = re.findall(pattern_month, text) if monthday[0] == '0' and len(monthday) > 2: month = monthday[0] + monthday[1] dayIndex = 2 else: month = monthday[0] dayIndex = 1 if len(monthday) > dayIndex: day = monthday[dayIndex] if len(day) == 1: # 针对 1</span>...>5日  # 上面的月份也有这种情况 day += re.findall(pattern_day2, text)[0] else: day = re.findall(pattern_day, text)[0] update_month = re.findall(pattern_month_update, text)[0] # 发布菜单的月份,用于跨年 if int(update_month) == 12 and int(month) == 1: year = str(int(year) + 1) thisday = int(year + month + day) self.startId = self.nowId if self.cache.has_key(thisday): menulog.info(u'更新%s的菜单id为%s' % (thisday, self.nowId)) self.cache[thisday] = self.nowId menulog.info('find %d' % self.nowId) except (IndexError, ): if self.nowId not in self.maybe: self.maybe.append(self.nowId) menulog.debug('IndexError add maybe') else: if text.find(u'请求素材不存在') == -1: # 搜索到的结果页有内容(不是菜单) self.usedId = self.nowId self.empty = 0 else: self.empty += 1 menulog.info('empty(%d) %d' % (self.empty, self.nowId)) if self.empty > self.maxEmpty: menulog.debug('break this round') break self.nowId += 1 # if self.maybe and max(self.maybe) > max(self.cache.values()): # # 取消这个设计, 格式变化太大, 很可能导致卡住 # menulog.info(u'更新起点至可能的ID:%d'% max(self.maybe)) # self.startId = max(self.maybe) if self.usedId > self.startId: menulog.info(u'更新起点至%d' % self.usedId) self.startId = self.usedId # 保存 db['startId'] = str(self.startId) db['lastQuery'] = str(self.lastQuery) db['cache'] = str(self.cache) db['maybe'] = str(self.maybe) menulog.info(u'第%d次查找结束' % self.count) # 已更新的菜单 self.cache = eval(db['cache']) future = [] for day in self.cache.keys(): if day >= self.today: future.append(day) future.sort() db['future'] = str(future) menulog.info(u'更新今后已找到的菜单列表') db.close() except (IOError, EOFError): menulog.info(u'缓存读取/创建异常') finally: self.running = False
def process(self): self.count += 1 self.today = int(time.strftime('%y%m%d', time.localtime())) menulog.info(u'开始第%d次查找@%d'% (self.count, self.getTime())) try: db = dbm.open(datafile, 'c') if not len(db): # 没有之前的数据文件 db['startId'] = str(startId) db['lastQuery'] = str(self.getTime()) db['cache'] = str(self.cache) db['maybe'] = str(self.maybe) self.startId = eval(db['startId']) - self.back self.cache = eval(db['cache']) self.maybe = eval(db['maybe']) self.nowId = self.startId self.lastQuery = self.getTime() # 保存最后搜索时间 while self.nowId - self.startId < self.interval: menulog.info(u'开始查找: %d'% self.nowId) text = getWebContent(urlhead+ str(self.nowId)) if text.find(u'今日菜单') != -1: self.empty = 0 try: year = re.findall(pattern_year, text)[0] monthday = re.findall(pattern_month, text) if monthday[0] == '0' and len(monthday)> 2: month = monthday[0]+monthday[1] dayIndex = 2 else: month = monthday[0] dayIndex = 1 if len(monthday) > dayIndex: day = monthday[dayIndex] if len(day) == 1: # 针对 1</span>...>5日  # 上面的月份也有这种情况 day += re.findall(pattern_day2, text)[0] else: day = re.findall(pattern_day, text)[0] update_month = re.findall(pattern_month_update, text)[0] # 发布菜单的月份,用于跨年 if int(update_month) == 12 and int(month) == 1: year = str(int(year)+1) thisday = int(year+month+day) self.startId = self.nowId if self.cache.has_key(thisday): menulog.info(u'更新%s的菜单id为%s'% (thisday, self.nowId)) self.cache[thisday] = self.nowId menulog.info('find %d'% self.nowId) except (IndexError, ): if text.find(u'祝您用餐愉快') and text.find(u'农历'): menulog.debug('gz menu') elif self.nowId not in self.maybe: self.maybe.append(self.nowId) menulog.debug('IndexError add maybe') else: if text.find(u'请求素材不存在') == -1: # 搜索到的结果页有内容(不是菜单) self.usedId = self.nowId self.empty = 0 else: self.empty += 1 if self.empty > 10: menulog.debug('break this round') break self.nowId += 1 # if self.maybe and max(self.maybe) > max(self.cache.values()): # # 取消这个设计, 格式变化太大, 很可能导致卡住 # menulog.info(u'更新起点至可能的ID:%d'% max(self.maybe)) # self.startId = max(self.maybe) if self.usedId > self.startId: menulog.info(u'更新起点至%d'% self.usedId) self.startId = self.usedId # 保存 db['startId'] = str(self.startId) db['lastQuery'] = str(self.lastQuery) db['cache'] = str(self.cache) db['maybe'] = str(self.maybe) menulog.info(u'第%d次查找结束'% self.count) # 已更新的菜单 self.cache = eval(db['cache']) future = [] for day in self.cache.keys(): if day >= self.today: future.append(day) future.sort() db['future'] = str(future) menulog.info(u'更新今后已找到的菜单列表') db.close() except (IOError, EOFError): menulog.info(u'缓存读取/创建异常') finally: self.running = False