def Catch(self): print 'listt.Catch' obj_cfg = db.cfg().read_one(config.runMode['name']) if obj_cfg: config.runing['cfg_id'] = obj_cfg['id'] config.runing['coding'] = obj_cfg['coding'] sNextpaper = obj_cfg['next_paper'] iPager = obj_cfg['pager'] if not sNextpaper: sNextpaper = obj_cfg['org_pager'].replace("{pager}", "1") resp = Http.Transfer().get(sNextpaper, encoding=config.runing['coding']) if not resp.abnormity: obj_html = etree.HTML(resp.content) xp_html = obj_html.xpath(config.runMode['filter']['CatchList']) for link in xp_html: db.listt().check_echo(link, obj_cfg) #print link if config.runMode['filter']['EndPager'] in resp.content and obj_cfg['next_count'] > 0: log.Log.init().info('the next page exists') iPager = iPager + 1 sNextpaper = obj_cfg['org_pager'].replace("{pager}", str(iPager)) db.cfg().update_nextpage(obj_cfg['id'], iPager, sNextpaper) else: db.cfg().update_nextpage(obj_cfg['id'], 1, '', last_pager=sNextpaper) config.runing['catch_select'] = 1 else: #一切从cfg开始,如果没有cfg的配置信息,正面 log.Log.init().info('cfg nothing')
def Catch(self): log.Log.init().info('files.Catch') obj_cfg = config.runStatic['obj_cfg'] obj_chapter = db.chapter().read_one(obj_cfg['id']) if obj_chapter: log.Log.init().info(obj_chapter['url']) resp = Http.Transfer().get(obj_chapter['url'], encoding=obj_cfg['coding']) if not resp.abnormity: #obj_html = etree.HTML(resp.content) #xp_html_list = obj_html.xpath(config.runMode['filter']['CatchChapterContent']['content']) dic_db = self.__analysis_xpath(resp.content) #print dic_db db.chapter().update_state_1(obj_chapter['id'], dic_db) else: if self.__ignore_http_err(resp.code): db.chapter().update_state(obj_chapter['id'], resp.code) log.Log.init().warn('ignore http error, this code:' + str(resp.code) + ', text:' + resp.abnormity_reason) config.runing['ignore'][ 'http_err'] = config.runing['ignore']['http_err'] - 1 if config.runing['ignore']['http_err'] < 0: raise Exception( "http error ignore times has been used up") else: raise Exception("http error, code :" + str(resp.code)) else: db.cfg().update_state(obj_cfg['id'], state=2) log.Log.init().info('these files is nothing, will be next') config.runing['catch_select'] = 3
def Catch(self): log.Log.init().info('chapter.Catch') obj_cfg = config.runStatic['obj_cfg'] obj_book = db.book().read_one(obj_cfg['id']) if obj_book: log.Log.init().info(obj_book['url']) resp = Http.Transfer().get(obj_book['url'], encoding=obj_cfg['coding']) if not resp.abnormity: obj_html = etree.HTML(resp.content) xp_html_introduction = obj_html.xpath( config.runMode['filter']['CatchChapterList'] ['introduction']) if xp_html_introduction: db.book().updata_introduction(obj_book['id'], xp_html_introduction[0]) xp_html_list = obj_html.xpath( config.runMode['filter']['CatchChapterList']['list']) for fi_html_list in xp_html_list: dic_db = self.__analysis_xpath( etree.tostring(fi_html_list)) dic_db['cfg_id'] = obj_cfg['id'] dic_db['book_id'] = str(obj_book['id']) dic_db['url'] = obj_book['url'] + dic_db['url'] db.chapter().insdb_url(dic_db) db.book().updata_state_1(obj_book['id']) log.Log.init().info( 'this book\'s chapters is done, will be next book') else: log.Log.init().warn(resp.abnormity_reason) else: log.Log.init().info('these books is nothing, will be next') config.runing['catch_select'] = 2
def __init__(self, settings): self._ws = None self.state = BotState(BotState.INITIALIZED) self.start_time = time.time() self.api = Http() self.cm = CogManager() self.settings = settings self.botconfig = self.settings.Bot self.ul: UserList self.room = self.botconfig.roomname
def Catch(self): print 'updataa.Catch' obj_list = listt().read_one(config.runing['cfg_id']) if obj_list: log.Log.init().info('catch in:'+obj_list['url']) resp = Http.Transfer().get(obj_list['url'], encoding=config.runing['coding']) if not resp.abnormity: print resp.content else: log.Log.init().info('list in nothin')
def list_repositories(self): repos = [] url = '%s/repositories/%s' % (self.api_url, self.organization) info('[bitbucket] listing repositories') get_next_page = True while get_next_page: http = Http(self.user, self.password) data = json.loads(http.get(url)) for repo in data['values']: for link in repo['links']['clone']: if link['name'] == 'ssh': repos.append(link['href']) if 'next' in data: url = data['next'] else: get_next_page = False return sorted(repos)
def Catch(self): log.Log.init().info('book.Catch') obj_cfg = config.runStatic['obj_cfg'] obj_catalog = db.catalog().read_one(obj_cfg['id']) #obj_cfg = config.runing['obj_cfg'] if obj_catalog: if not config.runing['catalog']: config.runing['catalog']['next_count'] = obj_catalog[ 'next_count'] #config.runing['cfg_id'] = obj_cfg['id'] #config.runing['coding'] = obj_cfg['coding'] #config.runing['site_root'] = obj_cfg['site_root'] sNextpaper = obj_catalog['next_paper'] iPager = obj_catalog['pager'] if not sNextpaper: sNextpaper = obj_catalog['org_pager'].replace("{pager}", "1") resp = Http.Transfer().get(sNextpaper, encoding=obj_cfg['coding']) if not resp.abnormity: obj_html = etree.HTML(resp.content) xp_list_html = obj_html.xpath( config.runMode['filter']['CatchBook']['list']) #list_info_book = [] for obj_html in xp_list_html: dic_info_book = self.__analysis_xpath( etree.tostring(obj_html)) is_echo = db.book().check_echo(dic_info_book, obj_cfg['id'], obj_catalog['id']) if is_echo: config.runing['catalog']['next_count'] = config.runing[ 'catalog']['next_count'] - 1 if config.runMode['filter']['EndPager'] in resp.content \ and config.runing['catalog']['next_count'] > 0 \ and len(xp_list_html)>0: log.Log.init().info( 'the next page exists. current next_count:' + str(config.runing['catalog']['next_count'])) iPager = iPager + 1 sNextpaper = obj_catalog['org_pager'].replace( "{pager}", str(iPager)) db.catalog().update_nextpage(obj_catalog['id'], iPager, sNextpaper) else: db.catalog().update_nextpage(obj_catalog['id'], 1, '', last_pager=sNextpaper) config.runing['catch_select'] = 1 else: # 一切从cfg开始,如果没有cfg的配置信息,下面就无法执行 log.Log.init().info('cfg nothing')
def __init__(self, settings): self._ws = None self.state = BotState(BotState.INITIALIZED) self.start_time = time.time() self.api = Http() self.cm = CogManager() self.settings = settings self.botconfig = self.settings.Bot self.ul = UserList() self.room = self.botconfig.roomname if self.settings.Bot.debug: self.log = QuantumLogger('QuantumJump', 10) else: self.log = QuantumLogger('QuantumJump', 19)