Beispiel #1
0
 def mark_executed(self):
     if self.curtask:
         if self.curtask.good == True:
             self.curtask.delete()
         self.curtask = None
     else:
        logger.write_fail("Task DB Storage,strange behavior", task=str(self.curtask))
Beispiel #2
0
 def mark_task_bad(self):
     if self.curtask:
         self.curtask.good = False
         self.curtask.reason = self.curtask.serialized_task.what_bad
         self.curtask.save()
     else:
         logger.write_fail("Task DB Storage,strange behavior", id=self.curtask.id)
Beispiel #3
0
 def run(self):
     try:
         self.tasks_executing_loop()
     except KeyboardInterrupt:
         pass
     except:
         logger.write_fail('global unknown error')
     self.holder.close()
 def downloading_page(self,link):
     try:
         page = self.get_page(link)
     except IOError:
         logger.write_fail("SimpleDM, IOError", link=link)
         raise IOError
     if page.getcode() != 200:
         logger.write_fail('SimpleDm, code is not 200', link=link, code=page.getcode())
     return page
Beispiel #5
0
 def run(self):
     try:
         while self.loop_condition():
             self.tasks_executing_loop()
     except KeyboardInterrupt:
         pass
     except:
         logger.write_fail("global unknown error")
         print logger.generate_text("SimpleTaskManager: global error")
     self.storage.close()
Beispiel #6
0
 def execute(self):
     entry = BeautifulSoup(self.entry,convertEntities=BeautifulSoup.XML_ENTITIES)
     bookinfo = Retriever.get_bookinfo(entry)
     if not bookinfo:
         logger.write_fail("empty links at entry",entry=entry, link=self.link)
         return True
     bookinfo.pagelink = self.link
     for format, link in bookinfo.links.items():
         bookinfo.links[format] = make_correct_link(self.link, bookinfo.links[format] )
     self.tasks = [ BookSavingTask(bookinfo) ]
     return True
Beispiel #7
0
 def get_tag_by_link(link):
     url = helpers.get_url_from_link(link).strip('/').split('/')[0]
     if not Retriever.TAGS_MAPPING:
         dm = WaitingDM()
         print 'downloading main page of LibRu for retrieving tags...'
         html = dm.download( helpers.get_site_root_link(link) )
         soup = get_soup(html)
         dirs = Retriever.get_dirs(soup)
         for link, tag in dirs:
             Retriever.TAGS_MAPPING[link.strip('/')] = tag
     if not Retriever.TAGS_MAPPING.has_key(url):
         #TODO make other way for retrieving of tags for this case
         logger.write_fail("LibRu parser: can't find tag in main page",link=link, url=url)
         return None
     return Retriever.TAGS_MAPPING[url]
 def download(self,link):
     downloaded = False
     while not downloaded:
         try:
             page = self.opener.open(link)
         except IOError:
             logger.write_fail("ProxyDM, IOError", link=link)
             self.change_proxy()
             continue
         if page.getcode() == 503:
             logger.write_fail('ProxyDM, code is 503', link=link, code=page.getcode())
             self.change_proxy()
         else:
             downloaded = True
     data = self.read_data(page)
     page.close()
     return (data, page)
 def downloading_page(self,link):
     #TODO process case when link can't available to download  at all (NOW: it'll be loop trying to download
     downloaded = False
     while not downloaded:
         try:
             page = self.get_page(link)
         except HTTPError as http_error:
             logger.write_fail("WaitingDM, HTTPError", link=link)
             raise http_error
         except IOError:
             logger.write_fail("WaitingDM, IOError", link=link)
             self._delaying(TM_WAITING_TIME)
             continue
         if page.getcode() == 503:
             logger.write_fail('WaitingDM, code not 200, waiting', link=link, code=page.getcode())
             print 'Error 503,',
             self._delaying(TM_WAITING_TIME)
         else:
             downloaded = True
     return page
Beispiel #10
0
 def got_bad_task(self, task):
     print "BAD TASK!"
     task.what_bad = logger.generate_text("bad task", task=task)
     self.storage.mark_task_bad()
     logger.write_fail("bad task!", task=task)
Beispiel #11
0
 def add_bad_task(self,task):
     print '\ngot BAD TASK'
     task.what_bad = logger.generate_text('bad task', task=task)
     self.holder.mark_task_bad()
     logger.write_fail("bad task!", task=task)