コード例 #1
0
ファイル: listt.py プロジェクト: cloudiy/fetch2mongodb
 def Catch(self):
     print 'listt.Catch'
     obj_cfg = db.cfg().read_one(config.runMode['name'])
     if obj_cfg:
         config.runing['cfg_id'] = obj_cfg['id']
         config.runing['coding'] = obj_cfg['coding']
         sNextpaper = obj_cfg['next_paper']
         iPager = obj_cfg['pager']
         if not sNextpaper:
             sNextpaper = obj_cfg['org_pager'].replace("{pager}", "1")
         resp = Http.Transfer().get(sNextpaper, encoding=config.runing['coding'])
         if not resp.abnormity:
             obj_html = etree.HTML(resp.content)
             xp_html = obj_html.xpath(config.runMode['filter']['CatchList'])
             for link in xp_html:
                 db.listt().check_echo(link, obj_cfg)
                 #print link
             if config.runMode['filter']['EndPager'] in resp.content and obj_cfg['next_count'] > 0:
                 log.Log.init().info('the next page exists')
                 iPager = iPager + 1
                 sNextpaper = obj_cfg['org_pager'].replace("{pager}", str(iPager))
                 db.cfg().update_nextpage(obj_cfg['id'], iPager, sNextpaper)
             else:
                 db.cfg().update_nextpage(obj_cfg['id'], 1, '', last_pager=sNextpaper)
                 config.runing['catch_select'] = 1
     else:
         #一切从cfg开始,如果没有cfg的配置信息,正面
         log.Log.init().info('cfg nothing')
コード例 #2
0
ファイル: files.py プロジェクト: cloudiy/fetch2mongodb
 def Catch(self):
     log.Log.init().info('files.Catch')
     obj_cfg = config.runStatic['obj_cfg']
     obj_chapter = db.chapter().read_one(obj_cfg['id'])
     if obj_chapter:
         log.Log.init().info(obj_chapter['url'])
         resp = Http.Transfer().get(obj_chapter['url'],
                                    encoding=obj_cfg['coding'])
         if not resp.abnormity:
             #obj_html = etree.HTML(resp.content)
             #xp_html_list = obj_html.xpath(config.runMode['filter']['CatchChapterContent']['content'])
             dic_db = self.__analysis_xpath(resp.content)
             #print dic_db
             db.chapter().update_state_1(obj_chapter['id'], dic_db)
         else:
             if self.__ignore_http_err(resp.code):
                 db.chapter().update_state(obj_chapter['id'], resp.code)
                 log.Log.init().warn('ignore http error, this code:' +
                                     str(resp.code) + ', text:' +
                                     resp.abnormity_reason)
                 config.runing['ignore'][
                     'http_err'] = config.runing['ignore']['http_err'] - 1
                 if config.runing['ignore']['http_err'] < 0:
                     raise Exception(
                         "http error ignore times has been used up")
             else:
                 raise Exception("http error, code :" + str(resp.code))
     else:
         db.cfg().update_state(obj_cfg['id'], state=2)
         log.Log.init().info('these files is nothing, will be next')
         config.runing['catch_select'] = 3
コード例 #3
0
 def Catch(self):
     log.Log.init().info('chapter.Catch')
     obj_cfg = config.runStatic['obj_cfg']
     obj_book = db.book().read_one(obj_cfg['id'])
     if obj_book:
         log.Log.init().info(obj_book['url'])
         resp = Http.Transfer().get(obj_book['url'],
                                    encoding=obj_cfg['coding'])
         if not resp.abnormity:
             obj_html = etree.HTML(resp.content)
             xp_html_introduction = obj_html.xpath(
                 config.runMode['filter']['CatchChapterList']
                 ['introduction'])
             if xp_html_introduction:
                 db.book().updata_introduction(obj_book['id'],
                                               xp_html_introduction[0])
             xp_html_list = obj_html.xpath(
                 config.runMode['filter']['CatchChapterList']['list'])
             for fi_html_list in xp_html_list:
                 dic_db = self.__analysis_xpath(
                     etree.tostring(fi_html_list))
                 dic_db['cfg_id'] = obj_cfg['id']
                 dic_db['book_id'] = str(obj_book['id'])
                 dic_db['url'] = obj_book['url'] + dic_db['url']
                 db.chapter().insdb_url(dic_db)
             db.book().updata_state_1(obj_book['id'])
             log.Log.init().info(
                 'this book\'s chapters is done, will be next book')
         else:
             log.Log.init().warn(resp.abnormity_reason)
     else:
         log.Log.init().info('these books is nothing, will be next')
         config.runing['catch_select'] = 2
コード例 #4
0
ファイル: blumpkin.py プロジェクト: genba2/QuantumJump
 def __init__(self, settings):
     self._ws = None
     self.state = BotState(BotState.INITIALIZED)
     self.start_time = time.time()
     self.api = Http()
     self.cm = CogManager()
     self.settings = settings
     self.botconfig = self.settings.Bot
     self.ul: UserList
     self.room = self.botconfig.roomname
コード例 #5
0
 def Catch(self):
     print 'updataa.Catch'
     obj_list = listt().read_one(config.runing['cfg_id'])
     if obj_list:
         log.Log.init().info('catch in:'+obj_list['url'])
         resp = Http.Transfer().get(obj_list['url'], encoding=config.runing['coding'])
         if not resp.abnormity:
             print resp.content
     else:
         log.Log.init().info('list in nothin')
コード例 #6
0
    def list_repositories(self):
        repos = []

        url = '%s/repositories/%s' % (self.api_url, self.organization)
        info('[bitbucket] listing repositories')

        get_next_page = True
        while get_next_page:
            http = Http(self.user, self.password)
            data = json.loads(http.get(url))

            for repo in data['values']:
                for link in repo['links']['clone']:
                    if link['name'] == 'ssh':
                        repos.append(link['href'])

            if 'next' in data:
                url = data['next']
            else:
                get_next_page = False

        return sorted(repos)
コード例 #7
0
 def Catch(self):
     log.Log.init().info('book.Catch')
     obj_cfg = config.runStatic['obj_cfg']
     obj_catalog = db.catalog().read_one(obj_cfg['id'])
     #obj_cfg = config.runing['obj_cfg']
     if obj_catalog:
         if not config.runing['catalog']:
             config.runing['catalog']['next_count'] = obj_catalog[
                 'next_count']
         #config.runing['cfg_id'] = obj_cfg['id']
         #config.runing['coding'] = obj_cfg['coding']
         #config.runing['site_root'] = obj_cfg['site_root']
         sNextpaper = obj_catalog['next_paper']
         iPager = obj_catalog['pager']
         if not sNextpaper:
             sNextpaper = obj_catalog['org_pager'].replace("{pager}", "1")
         resp = Http.Transfer().get(sNextpaper, encoding=obj_cfg['coding'])
         if not resp.abnormity:
             obj_html = etree.HTML(resp.content)
             xp_list_html = obj_html.xpath(
                 config.runMode['filter']['CatchBook']['list'])
             #list_info_book = []
             for obj_html in xp_list_html:
                 dic_info_book = self.__analysis_xpath(
                     etree.tostring(obj_html))
                 is_echo = db.book().check_echo(dic_info_book,
                                                obj_cfg['id'],
                                                obj_catalog['id'])
                 if is_echo:
                     config.runing['catalog']['next_count'] = config.runing[
                         'catalog']['next_count'] - 1
             if config.runMode['filter']['EndPager'] in resp.content \
                     and config.runing['catalog']['next_count'] > 0 \
                     and len(xp_list_html)>0:
                 log.Log.init().info(
                     'the next page exists. current next_count:' +
                     str(config.runing['catalog']['next_count']))
                 iPager = iPager + 1
                 sNextpaper = obj_catalog['org_pager'].replace(
                     "{pager}", str(iPager))
                 db.catalog().update_nextpage(obj_catalog['id'], iPager,
                                              sNextpaper)
             else:
                 db.catalog().update_nextpage(obj_catalog['id'],
                                              1,
                                              '',
                                              last_pager=sNextpaper)
                 config.runing['catch_select'] = 1
     else:
         # 一切从cfg开始,如果没有cfg的配置信息,下面就无法执行
         log.Log.init().info('cfg nothing')
コード例 #8
0
ファイル: blumpkin.py プロジェクト: thebanon/chreebot
 def __init__(self, settings):
     self._ws = None
     self.state = BotState(BotState.INITIALIZED)
     self.start_time = time.time()
     self.api = Http()
     self.cm = CogManager()
     self.settings = settings
     self.botconfig = self.settings.Bot
     self.ul = UserList()
     self.room = self.botconfig.roomname
     if self.settings.Bot.debug:
         self.log = QuantumLogger('QuantumJump', 10)
     else:
         self.log = QuantumLogger('QuantumJump', 19)