def countRoomsForRecentDays(self, query, city, days): """ get the number of available individual rooms for recent days :param query: the query string :param city: the city name specified in config.json, this will take effect only when query is None :param days: the number of days before now :return: """ ret = 0 cursor = self._conn.cursor() try: if query: cursor.execute( "SELECT DISTINCT(room_id) FROM room_info WHERE update_time>? AND query_str=?", (getDeltaTimeStamp(datetime.datetime.now(), -days), query)) elif city: cursor.execute( "SELECT DISTINCT(room_id) FROM room_info WHERE update_time>? AND city=?", (getDeltaTimeStamp(datetime.datetime.now(), -days), city)) else: Log.w(StorageService._TAG, "query and city all empty!!") return 0 ret = len(cursor.fetchall()) except BaseException as e: Log.w(StorageService._TAG, "countRoomsForRecentDays() failed", e) finally: cursor.close() self._conn.commit() return ret
def testServer(): configPath = "config_example.json" parentPath = os.environ.get('HOME') if not parentPath: print('can not fetch $HOME') parentPath = "." parentPath += "/airbnb" if not os.path.exists(parentPath): os.mkdir(parentPath, mode=0o755) logPath = parentPath + "/all_logs.log" storagePath = parentPath + "/rooms.db" config = parseConfigFile(configPath) if not config: sys.exit(0) config.localStoragePath = storagePath # 配置日志模块 Log.config(logPath, echo=True) # 初始化service service = SearchService(config) # 准备入住日期数据 start_date = datetime.datetime.now() + datetime.timedelta(weeks=1) time_title = '{:%Y-%m-%d}'.format(start_date) leave_dates = [start_date + datetime.timedelta(days=x) for x in [1, 2, 3]] checkin_date = '{:%Y-%m-%d}'.format(start_date) # 2020.03.22 一次尝试多种入住时间(1天,2天,3天),因为有些房源只有在1天以上入住时间下才能搜到 checkout_dates = ['{:%Y-%m-%d}'.format(leave_date) for leave_date in leave_dates] # 启动查询 service.doQuery(checkin_date, checkout_dates, time_title)
def daemon_start(pid_file): def handle_exit(signum, _): if signum == signal.SIGTERM: sys.exit(0) sys.exit(1) signal.signal(signal.SIGINT, handle_exit) signal.signal(signal.SIGTERM, handle_exit) # fork only once because we are sure parent will exit pid = os.fork() assert pid != -1 if pid > 0: # parent waits for its child time.sleep(5) sys.exit(0) # child signals its parent to exit ppid = os.getppid() pid = os.getpid() if write_pid_file(pid_file, pid) != 0: Log.e(MAIN_TAG, "write_pid_file() failed, pid = " + str(pid)) os.kill(ppid, signal.SIGINT) sys.exit(1) os.setsid() signal.signal(signal.SIG_IGN, signal.SIGHUP) Log.d(MAIN_TAG, 'started') os.kill(ppid, signal.SIGTERM) sys.stdin.close()
def _sendWithAttachment(self, receivers, title, content, attachment=None): # 设置eamil信息 # 添加一个MIMEmultipart类,处理正文及附件 message = MIMEMultipart() message['From'] = self.sender message['To'] = receivers[0] message['Subject'] = title part = MIMEApplication(open(attachment, 'rb').read()) part.add_header('Content-Disposition', 'attachment', filename=attachment) # 将内容附加到邮件主体中 message.attach(MIMEText(content, 'plain', 'utf-8')) message.attach(part) # 登录并发送 try: smtpObj = smtplib.SMTP_SSL( ) if self.sendType == "ssl" else smtplib.SMTP() smtpObj.connect(self.mailHost, self.mailPort) smtpObj.ehlo() if self.sendType == "tls": smtpObj.starttls() smtpObj.login(self.sender, self.passwd) smtpObj.sendmail(self.sender, receivers, message.as_string()) Log.i(MailReporter.TAG, 'Mail successfully sent to ' + receivers[0]) smtpObj.quit() except smtplib.SMTPException as e: Log.e(MailReporter.TAG, 'send() error', e)
def tryGetHomeInfo(self, checkin_date, checkout_date, homeInfoCollection, adults, city): """ the kernel logic for getting home information through cgi :param checkin_date: :param checkout_date: :param homeInfoCollection: :param adults: :param city: :return: """ api = Api(randomize=False) query = getQueryStr(city) hasNextPage = True startOffset = 0 sectionOffset = 0 try: while hasNextPage: homes = api.get_homes(query=query, checkin=checkin_date, checkout=checkout_date, offset=startOffset, sectionOffset=sectionOffset, items_per_grid=50, adults=adults) pagination = self.retrieveHomeData(query, homes, homeInfoCollection) if not pagination: hasNextPage = False else: print("paging for next...") hasNextPage = pagination["hasNextPage"] startOffset = pagination.get("itemsOffset") sectionOffset = pagination.get("sectionOffset") time.sleep(self._queryIntervalInSec) # do not query too fast except BaseException as e: Log.w(SearchService.TAG, "get_homes() failed, ", str(e) + "\r\nexception detail:" + traceback.format_exc())
def _sendText(self, receivers, title, content): # 设置email信息 # 邮件内容设置 message = MIMEText(content, 'plain', 'utf-8') # 邮件主题 message['Subject'] = title # 发送方信息 message['From'] = self.sender # 接受方信息 message['To'] = receivers[0] try: smtpObj = smtplib.SMTP_SSL( ) if self.sendType == "ssl" else smtplib.SMTP() # 连接到服务器 smtpObj.connect(self.mailHost, self.mailPort) smtpObj.ehlo() if self.sendType == "tls": smtpObj.starttls() # 登录到服务器 smtpObj.login(self.sender, self.passwd) # 发送 smtpObj.sendmail(self.sender, receivers, message.as_string()) # 退出 smtpObj.quit() Log.i(MailReporter.TAG, 'Mail successfully sent to ' + receivers[0]) except smtplib.SMTPException as e: Log.e(MailReporter.TAG, 'send() error', e)
def main(): sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') configPath = None parentPath = os.environ.get('HOME') if not parentPath: print('can not fetch $HOME') parentPath = "." parentPath += "/airbnb" if not os.path.exists(parentPath): os.mkdir(parentPath, mode=0o755) pidFilePath = parentPath + "/" + PID_FILE_NAME logPath = parentPath + "/all_logs.log" storagePath = parentPath + "/rooms.db" shortopts = 'h:c:d:l:s:v' # 命令行参数简写 longopts = [ 'help', 'storage-file=', 'log-file=', 'version', 'config-file=' ] # 完整体 # 执行动作 start 后台运行,stop 结束后台已经运行的实例 operation = None optlist, args = getopt.getopt(sys.argv[1:], shortopts, longopts) # 拿到命令行参数的key value for key, value in optlist: if key in ('-v', '--version'): print("version: airbnb fetcher 0.1") sys.exit(0) if key in ('-h', '--help'): printHelp() sys.exit(0) if key in ('-c', '--config-file'): configPath = value if key in ('-l', '--log-file'): logPath = value if key in ('-s', '--storage-file'): storagePath = value if key == '-d': operation = value # 配置日志模块 Log.config(logPath, echo=False) if operation == "start": daemon_start(pidFilePath) elif operation == "stop": daemon_stop(pidFilePath) sys.exit(0) if not configPath: Log.e(MAIN_TAG, "config path not provided") sys.exit(0) config = parseConfigFile(configPath) if not config: sys.exit(0) config.localStoragePath = storagePath service = SearchService(config) service.start()
def saveReservationInfo(self, roomId): """ mark a room as available for today :param roomId: :return: """ cursor = self._conn.cursor() success = False try: now_date = getDateStr() cursor.execute( "INSERT INTO reservation_info (room_id, date) VALUES(?, ?)", (roomId, now_date)) success = cursor.rowcount > 0 except BaseException as e: Log.w(StorageService._TAG, "saveReservationInfo() failed", e) finally: cursor.close() self._conn.commit() return success
def isAvailable(self, roomId, dateStr): """ query whether a room specified by roomId is available on given date :param roomId: :param dateStr: like 2019-05-30, be sure to use '{:%Y-%m-%d}'.format(datetime) :return: True for available that day, False otherwise """ cursor = self._conn.cursor() ret = False try: cursor.execute( "SELECT * FROM reservation_info WHERE room_id=? AND date=?", (roomId, dateStr)) ret = len(cursor.fetchall()) > 0 except BaseException as e: Log.w(StorageService._TAG, "hasReservation() failed", e) finally: cursor.close() self._conn.commit() return ret
def prepareDB(self, path): conn = sqlite3.connect(path) cursor = conn.cursor() try: # table for room cursor.execute( "CREATE TABLE IF NOT EXISTS room_info (_id INTEGER PRIMARY KEY AUTOINCREMENT,\ room_id INTEGER UNIQUE, person_capacity INTEGER, city VARCHAR(20), beds INTEGER, \ localized_neighborhood VARCHAR(50), price INTEGER, pic VARCHAR(200), update_time INTEGER,\ query_str VARCHAR(100))") # table for reservation cursor.execute( "CREATE TABLE IF NOT EXISTS reservation_info (_id INTEGER PRIMARY KEY AUTOINCREMENT,\ room_id INTEGER, date VARCHAR(20) )") except BaseException as e: Log.e(StorageService._TAG, "prepareDB() failed", e) finally: cursor.close() conn.commit() return conn
def start(self): print("airbnb service start") while True: while True: now = datetime.datetime.now() if now.hour == self._config.startHour and now.minute == self._config.startMinute: dates = self.prepareDate() self.analyzeCollection.clear() self.forExcel.clear() try: for item in dates: self.doQuery(item[0], item[1], item[2]) except BaseException as e: Log.e(SearchService.TAG, "doQuery() failed, ", e) if len(self.analyzeCollection) > 0: self.reportForAnalyzeResult(self.analyzeCollection, self.forExcel) else: Log.w(SearchService.TAG, "no analyze result generated") time.sleep(70) else: time.sleep(10) # in second
def write_pid_file(pid_file, pid): import fcntl import stat try: fd = os.open(pid_file, os.O_RDWR | os.O_CREAT, stat.S_IRUSR | stat.S_IWUSR) except OSError as e: Log.w(MAIN_TAG, "exception for write_pid_file()", e) return -1 flags = fcntl.fcntl(fd, fcntl.F_GETFD) assert flags != -1 flags |= fcntl.FD_CLOEXEC r = fcntl.fcntl(fd, fcntl.F_SETFD, flags) assert r != -1 # There is no platform independent way to implement fcntl(fd, F_SETLK, &fl) # via fcntl.fcntl. So use lockf instead try: fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB, 0, 0, os.SEEK_SET) except IOError: r = os.read(fd, 32) if r: Log.e(MAIN_TAG, 'already started at pid %s' % Utils.to_str(r)) else: Log.e(MAIN_TAG, 'already started') os.close(fd) return -1 os.ftruncate(fd, 0) os.write(fd, Utils.to_bytes(str(pid))) return 0
def doQuery(self, checkin_date, checkout_dates, time_title): """ the entry for build and execute a home api query :param checkin_date: the date when u are going to check in. :param checkout_dates: multiple checkout dates for enumerating the home info as completable as possible :param time_title: :return: """ Log.d(SearchService.TAG, "===> querying on " + getDateStr()) storageService = StorageService(self._config.localStoragePath) try: for (cityName, cityList) in self._config.cityList.items(): retry_time = 0 while retry_time < 3: homeInfoCollection = dict() for city in cityList: for checkout_date in checkout_dates: for adults in [1, 2]: self.tryGetHomeInfo(checkin_date, checkout_date, homeInfoCollection, adults, city) if len(homeInfoCollection) > 0: roomInfos = [x for (_, x) in homeInfoCollection.items()] storageService.saveOrUpdateRoomBatch(roomInfos) analyze, excelRet = self.performAnalyze(roomInfos, cityName, storageService, time_title) self.analyzeCollection.append(analyze) self.forExcel.append(excelRet) break else: retry_time += 1 Log.w(SearchService.TAG, "no data for query={0}, will retry for {1}th time".format(cityName, retry_time)) time.sleep(self._queryIntervalForFailInSec) # take your time to do it except BaseException as e: Log.w(SearchService.TAG, "doQuery() failed, ", str(e) + "\r\nexception detail:" + traceback.format_exc()) finally: storageService.close()
def saveOrUpdateRoomBatch(self, roomInfos): """ save or update information for a given room :param roomInfos: a list of RoomInfo :return: """ cursor = self._conn.cursor() success = True try: for roomInfo in roomInfos: cursor.execute("SELECT room_id FROM room_info WHERE room_id=?", (roomInfo.roomId, )) if len(cursor.fetchall()) > 0: # exist cursor.execute( "UPDATE room_info SET person_capacity=?, city=?, beds=?,\ localized_neighborhood=?, price=?, pic=?, update_time=?, query_str=? WHERE room_id=?", (roomInfo.personCapacity, roomInfo.city, roomInfo.beds, roomInfo.neighbourhood, int( roomInfo.price), roomInfo.pic, getNowTimeStamp(), roomInfo.query, roomInfo.roomId)) else: # new cursor.execute( "INSERT INTO room_info (room_id, person_capacity, city, beds, localized_neighborhood, \ price, pic, update_time, query_str) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", (roomInfo.roomId, roomInfo.personCapacity, roomInfo.city, roomInfo.beds, roomInfo.neighbourhood, roomInfo.price, roomInfo.pic, getNowTimeStamp(), roomInfo.query)) success = cursor.rowcount > 0 and success except BaseException as e: success = False Log.w(StorageService._TAG, "saveOrUpdateRoomBatch() failed", e) finally: cursor.close() self._conn.commit() return success
def getRoomById(self, roomId): """ query full information for a given roomId :param roomId: :return: a RoomInfo with given roomId if found, None if not """ cursor = self._conn.cursor() ret = None try: cursor.execute( "SELECT room_id, person_capacity, city, beds, localized_neighborhood, \ price, pic, query_str FROM room_info WHERE room_id=?", (roomId, )) values = cursor.fetchall() if len(values) > 0: ret = RoomInfo(values[0][0], values[0][1], values[0][2], values[0][3], values[0][4], values[0][5], values[0][6], values[0][7]) except BaseException as e: Log.w(StorageService._TAG, "getRoomById() failed", e) finally: cursor.close() self._conn.commit() return ret
def HttpGetRequest(host, url, decode='utf-8'): """HTTP/HTTPS GET 方法""" try: if host.startswith("https://"): connect = httplib.HTTPSConnection host = host[8:] elif host.startswith("http://"): connect = httplib.HTTPConnection host = host[7:] else: connect = httplib.HTTPConnection with closing(connect(host)) as conn: conn.request(method="GET", url=url) response = conn.getresponse() if response.status != httplib.OK: return None res = response.read() res = res.decode(decode, 'ignore').strip() return res except Exception as e: Log.d(TAG, e)
def retrieveHomeData(self, query, originReturn, homeInfoCollection): ''' pagenationMeta is like "paginationMetadata":{ "__typename":"DoraExploreV3PaginationMetadata", "hasNextPage":true, "itemsOffset":40, "sectionOffset":3, "hasPreviousPage":true, "previousPageSectionOffset":0, "previousPageItemsOffset":0, "searchSessionId":"d91045aa-0d95-49ba-ad44-0d7bcf8b2813", "pageLimit":20, "totalCount":"124" } ''' Log.d(SearchService.TAG, "analyzing " + query) sections = originReturn["data"]["dora"]["exploreV3"]["filters"]["sections"] pagination = originReturn["data"]["dora"]["exploreV3"]["metadata"]["paginationMetadata"] if sections and len(sections) > 0: # if pagination: # print("has_next_page = " + str(pagination["has_next_page"])) # print("items_offset = " + str(pagination.get("items_offset"))) # print("section_offset = " + str(pagination.get("section_offset"))) for item in sections: if item and item["__typename"] == "DoraExploreV3ListingsSection": real_homes = item["items"] Log.d(SearchService.TAG, str(len(real_homes)) + " rooms for " + query) for roomItem in real_homes: roomInfo = RoomInfo.parseFromDict(roomItem, query) if roomInfo: homeInfoCollection[roomInfo.roomId] = roomInfo else: Log.w(SearchService.TAG, "explore_tabs is empty") return pagination
def parseConfigFile(path): ''' example for a valid config file, should be in json format { "start_hour":14, "start_minute":0, "counting_days":30, "room_limit":0, "smtp_host":"smtp.exmail.qq.com", "smtp_port":465, "email_receiver":"*****@*****.**", "email_sender":"*****@*****.**", "sender_passwd":"***", "query":[ "上海,徐汇区,中国", "上海,虹口区,中国", "上海,浦东新区,中国", "上海,普陀区,中国", "上海,长宁区,中国", "上海,闸北区,中国", "上海,杨浦区,中国", "上海,黄浦区,中国", "上海,卢湾区,中国", "上海,静安区,中国", "上海,宝山区,中国", "上海,闵行区,中国", "上海,嘉定区,中国", "上海,金山区,中国", "上海,松江区,中国", "上海,青浦区,中国", "上海,南汇区,中国", "上海,奉贤区,中国" ] } :param path: :return: ''' try: f = open(path, "r", encoding='utf-8') jsonStr = f.read() configDict = json.loads(jsonStr.replace("\\n", "")) hour = configDict.get("start_hour") minute = configDict.get("start_minute") counting_days = configDict.get("counting_days") room_limit = configDict.get("room_limit") email_receiver = configDict.get("email_receiver") email_sender = configDict.get("email_sender") sender_passwd = configDict.get("sender_passwd") query = configDict.get("query") smtp_host = configDict.get("smtp_host") smtp_port = configDict.get("smtp_port") send_type = configDict.get("send_type") cities = dict() for item in query: splited = item.split(",") if len(splited) < 2: continue content = cities.get(splited[0]) if not content: content = list() cities[splited[0]] = content content.append(splited) return Config(hour, minute, cities, email_receiver, email_sender, sender_passwd, room_limit, countingDays=counting_days, smtpHost=smtp_host, smtpPort=smtp_port, sendType=send_type) except json.decoder.JSONDecodeError as e: Log.w(MAIN_TAG, "parseConfigFile() fail, ", e) return None
def daemon_stop(pid_file): import errno try: with open(pid_file) as f: buf = f.read() pid = Utils.to_str(buf) if not buf: Log.e(MAIN_TAG, 'not running') except IOError as e: Log.e(MAIN_TAG, "daemon_stop() fail", e) if e.errno == errno.ENOENT: # always exit 0 if we are sure daemon is not running Log.e(MAIN_TAG, 'not running') return sys.exit(1) pid = int(pid) if pid > 0: try: os.kill(pid, signal.SIGTERM) except OSError as e: if e.errno == errno.ESRCH: Log.e(MAIN_TAG, 'not running') # always exit 0 if we are sure daemon is not running return Log.e(MAIN_TAG, "daemon_stop() fail", e) sys.exit(1) else: Log.e(MAIN_TAG, 'pid is not positive: ' + str(pid)) # sleep for maximum 10s for i in range(0, 200): try: # query for the pid os.kill(pid, 0) except OSError as e: if e.errno == errno.ESRCH: break time.sleep(0.05) else: Log.e(MAIN_TAG, 'timed out when stopping pid ' + str(pid)) sys.exit(1) Log.d(MAIN_TAG, 'stopped') os.unlink(pid_file)
#!/usr/bin/env python # -*- coding: utf-8 -*- # 测试日志辅助类 import sys sys.path.append("../") from LogHelper import Log TAG = "Test" Log.d(TAG, "Hello") Log.e(TAG, "World!") Log.i(TAG, "Hehe") Log.w(TAG, "XXXX")