def mark_executed(self): if self.curtask: if self.curtask.good == True: self.curtask.delete() self.curtask = None else: logger.write_fail("Task DB Storage,strange behavior", task=str(self.curtask))
def mark_task_bad(self): if self.curtask: self.curtask.good = False self.curtask.reason = self.curtask.serialized_task.what_bad self.curtask.save() else: logger.write_fail("Task DB Storage,strange behavior", id=self.curtask.id)
def run(self): try: self.tasks_executing_loop() except KeyboardInterrupt: pass except: logger.write_fail('global unknown error') self.holder.close()
def downloading_page(self,link): try: page = self.get_page(link) except IOError: logger.write_fail("SimpleDM, IOError", link=link) raise IOError if page.getcode() != 200: logger.write_fail('SimpleDm, code is not 200', link=link, code=page.getcode()) return page
def run(self): try: while self.loop_condition(): self.tasks_executing_loop() except KeyboardInterrupt: pass except: logger.write_fail("global unknown error") print logger.generate_text("SimpleTaskManager: global error") self.storage.close()
def execute(self): entry = BeautifulSoup(self.entry,convertEntities=BeautifulSoup.XML_ENTITIES) bookinfo = Retriever.get_bookinfo(entry) if not bookinfo: logger.write_fail("empty links at entry",entry=entry, link=self.link) return True bookinfo.pagelink = self.link for format, link in bookinfo.links.items(): bookinfo.links[format] = make_correct_link(self.link, bookinfo.links[format] ) self.tasks = [ BookSavingTask(bookinfo) ] return True
def get_tag_by_link(link): url = helpers.get_url_from_link(link).strip('/').split('/')[0] if not Retriever.TAGS_MAPPING: dm = WaitingDM() print 'downloading main page of LibRu for retrieving tags...' html = dm.download( helpers.get_site_root_link(link) ) soup = get_soup(html) dirs = Retriever.get_dirs(soup) for link, tag in dirs: Retriever.TAGS_MAPPING[link.strip('/')] = tag if not Retriever.TAGS_MAPPING.has_key(url): #TODO make other way for retrieving of tags for this case logger.write_fail("LibRu parser: can't find tag in main page",link=link, url=url) return None return Retriever.TAGS_MAPPING[url]
def download(self,link): downloaded = False while not downloaded: try: page = self.opener.open(link) except IOError: logger.write_fail("ProxyDM, IOError", link=link) self.change_proxy() continue if page.getcode() == 503: logger.write_fail('ProxyDM, code is 503', link=link, code=page.getcode()) self.change_proxy() else: downloaded = True data = self.read_data(page) page.close() return (data, page)
def downloading_page(self,link): #TODO process case when link can't available to download at all (NOW: it'll be loop trying to download downloaded = False while not downloaded: try: page = self.get_page(link) except HTTPError as http_error: logger.write_fail("WaitingDM, HTTPError", link=link) raise http_error except IOError: logger.write_fail("WaitingDM, IOError", link=link) self._delaying(TM_WAITING_TIME) continue if page.getcode() == 503: logger.write_fail('WaitingDM, code not 200, waiting', link=link, code=page.getcode()) print 'Error 503,', self._delaying(TM_WAITING_TIME) else: downloaded = True return page
def got_bad_task(self, task): print "BAD TASK!" task.what_bad = logger.generate_text("bad task", task=task) self.storage.mark_task_bad() logger.write_fail("bad task!", task=task)
def add_bad_task(self,task): print '\ngot BAD TASK' task.what_bad = logger.generate_text('bad task', task=task) self.holder.mark_task_bad() logger.write_fail("bad task!", task=task)