def get_book(url, proxy=None): print 'Grabbing...', url, time.strftime("%H:%M:%S") s = tools.get_url(url, proxy) if s: mod = get_module(url) name = mod.parse_title(s) try: book = Book.objects.get(name=name) book.status = 1 book.url = url book.message = '' book.save() book.chapter_set.all().delete() except Book.DoesNotExist: book = Book.objects.create(name=name, url=url) try: try: parse_index(url, book, s, proxy) except Exception, e: traceback.print_exc() book.message = str(e) return False, str(e) finally: book.status = 2 book.finished = book.chapter_set.filter(size__gt=0).count() book.save() print 'successful!', time.strftime("%H:%M:%S") else: print 'Parsing URL(%s) failed!' % url, time.strftime("%H:%M:%S")
def parse_page(book_id, id, url, title, lock, proxy): print 'Getting...', url mod = get_module(url) chapter = Chapter.objects.get(pk=id) text = tools.get_url(url.encode('utf-8'), proxy) content = mod.parse_page(title, text) if settings.CONTENT == 'db': chapter.content = content else: path = os.path.join(settings.MEDIA_ROOT, 'zipbooks', str(book_id)) if not os.path.exists(path): os.makedirs(path) filename = os.path.join(path, "%04d.txt" % chapter.order) try: f = file(filename, 'wb') f.write(content) f.close() except: traceback.print_exc() print 'Getting...', url, 'Failed' return lock.acquire() try: chapter.size = len(content) chapter.save() finally: lock.release() print 'Getting...', url, 'Done'