예제 #1
0
def get_urls_from_text(data,configuration=None,normalize=False):
    urls = collections.OrderedDict()
    try:
        data = unicode(data)
    except UnicodeDecodeError:
        data=data.decode('utf8') ## for when called outside calibre.

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)
    
    for href in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', data):
        # this (should) catch normal story links, some javascript
        # 'are you old enough' links, and 'Report This' links.
        if 'story.php' in href:
            m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",href)
            if m != None:
                href = form_url(href,m.group('sid'))
        try:
            href = href.replace('&index=1','')
            adapter = adapters.getAdapter(configuration,href)
            if adapter.story.getMetadata('storyUrl') not in urls:
                urls[adapter.story.getMetadata('storyUrl')] = [href]
            else:
                urls[adapter.story.getMetadata('storyUrl')].append(href)
        except:
            pass

    # Simply return the longest URL with the assumption that it contains the
    # most user readable metadata, if not normalized
    return urls.keys() if normalize else [max(value, key=len) for key, value in urls.items()]
예제 #2
0
def fetch_metadata(url: str, chapters=True) -> bytes:
    configuration = Configuration(adapters.getConfigSectionsFor(url), 'epub')
    adapter = adapters.getAdapter(configuration, url)
    adapter.is_adult = True
    metadata = adapter.getStoryMetadataOnly().getAllMetadata()

    if chapters:
        metadata['zchapters'] = []
        for i, chap in enumerate(adapter.get_chapters()):
            metadata['zchapters'].append((i + 1, chap))

    return metadata
예제 #3
0
 def __init__(self, url):
     if isinstance(url, Target):
         url = url.url
     self.url = url
     configuration = Configuration(["test1.com"], "HTML", lightweight=True)
     try:
         adapter = adapters.getAdapter(configuration, url)
     except UnknownSite:
         raise NotAValidTarget(url)
     self.abbrev = adapter.story.getMetadata("siteabbrev")
     if self.abbrev is None:
         self.abbrev = "unknown"
     self.id = adapter.story.getMetadata("storyId")
     if self.id is None:
         self.id = self._id_from_url(url)
예제 #4
0
def get_urls_from_page(url,configuration=None,normalize=False):

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration,url,anyurl=True)
        
        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl=""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url+addurl)
                # login the session.
                adapter.performLogin(url,data)
                # get the list page with logged in session.
                
        if 'fimfiction.net' in url and adapter.getConfig("is_adult"):
            data = adapter._fetchUrl(url)
            adapter.set_adult_cookie()
    
        # this way it uses User-Agent or other special settings.  Only AO3
        # is doing login.
        data = adapter._fetchUrl(url,usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch=None
    if 'scarvesandcoffee.net' in url:
        restrictsearch=('div',{'id':'mainpage'})

    return get_urls_from_html(data,url,configuration,normalize,restrictsearch)
예제 #5
0
 def getNormalStoryURLSite(url):
     # print("getNormalStoryURLSite:%s"%url)
     if not adapters.gerNormalStoryURL.__dummyconfig:
         adapters.getNormalStoryURL.__dummyconfig = Configuration(
             ["test1.com"], "EPUB", lightweight=True)
     # pulling up an adapter is pretty low over-head.  If
     # it fails, it's a bad url.
     try:
         adapter = adapters.getAdapter(
             adapters.getNormalStoryURL.__dummyconfig, url)
         url = adapter.url
         site = adapter.getSiteDomain()
         storyid = adapter.story.getMetadata('storyId')
         del adapter
         return (url, site, storyid)
     except:
         return None
예제 #6
0
def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None):
    urls = collections.OrderedDict()

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)

    soup = BeautifulSoup(data,"html5lib")
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #logger.debug("restrict search:%s"%soup)
    
    for a in soup.findAll('a'):
        if a.has_attr('href'):
            #logger.debug("a['href']:%s"%a['href'])
            href = form_url(url,a['href'])
            #logger.debug("1 urlhref:%s"%href)
            # this (should) catch normal story links, some javascript
            # 'are you old enough' links, and 'Report This' links.
            if 'story.php' in a['href']:
                #logger.debug("trying:%s"%a['href'])
                m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",a['href'])
                if m != None:
                    href = form_url(a['href'] if '//' in a['href'] else url,
                                    m.group('sid'))
                    
            try:
                href = href.replace('&index=1','')
                #logger.debug("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration,href)
                #logger.debug("found adapter")
                if adapter.story.getMetadata('storyUrl') not in urls:
                    urls[adapter.story.getMetadata('storyUrl')] = [href]
                else:
                    urls[adapter.story.getMetadata('storyUrl')].append(href)
            except Exception, e:
                #logger.debug e
                pass
예제 #7
0
    if options.force:
        configuration.set('overrides', 'always_overwrite', 'true')

    if options.options:
        for opt in options.options:
            (var, val) = opt.split('=')
            configuration.set('overrides', var, val)

    if options.list or options.normalize:
        retlist = get_urls_from_page(url, configuration, normalize=options.normalize)
        return '\n'.join(retlist)
        

    try:
        adapter = adapters.getAdapter(configuration, url)
        #adapter.setChaptersRange(options.begin, options.end)
        # three tries, that's enough if both user/pass & is_adult needed,
        # or a couple tries of one or the other     
        for x in range(0, 2):
            try:
                print('XXX in two-1')
                adapter.getStoryMetadataOnly()
                print('XXX in two-2')
            except exceptions.FailedToLogin, f:
                if f.passwdonly:
                    print 'Story requires a password.'
                else:
                    print 'Login Failed, Need Username/Password.'
                    sys.stdout.write('Username: ')
                    adapter.username = sys.stdin.readline().strip()
예제 #8
0
파일: main.py 프로젝트: JimmXinu/FanFicFare
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        fileId = self.request.get('id')
        # User object can't pass, just email address
        user = users.User(self.request.get('user'))
        format = self.request.get('format')
        url = self.request.get('url')
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult')
        email = self.request.get('email')

        logging.info("Downloading: " + url + " for user: "******"ID: " + fileId)

        adapter = None
        writerClass = None

        # use existing record if available.
        # fileId should have record from /fdown.
        download = getDownloadMeta(id=fileId,url=url,user=user,format=format,new=True)
        for chunk in download.data_chunks:
            chunk.delete()
        download.put()

        logging.info('Creating adapter...')

        try:
            configuration = self.getUserConfig(user,url,format)
            adapter = adapters.getAdapter(configuration,url)
            adapter.setChaptersRange(download.ch_begin,download.ch_end)

            logging.info('Created an adapter: %s' % adapter)

            if login or password:
                adapter.username=login
                adapter.password=password
            adapter.is_adult=is_adult

            # adapter.getStory() is what does all the heavy lifting.
            # adapter.getStoryMetadataOnly() only fetches enough to
            # get metadata.  writer.writeStory() will call
            # adapter.getStory(), too.
            writer = writers.getWriter(format,configuration,adapter)
            download.name = writer.getOutputFileName()
            #logging.debug('output_filename:'+writer.getConfig('output_filename'))
            logging.debug('getOutputFileName:'+writer.getOutputFileName())
            download.title = adapter.getStory().getMetadata('title')
            download.author = adapter.getStory().getMetadata('author')
            download.url = adapter.getStory().getMetadata('storyUrl')
            download.put()

            allmeta = adapter.getStory().getAllMetadata(removeallentities=True,doreplacements=False)

            outbuffer = StringIO()
            writer.writeStory(outbuffer)
            data = outbuffer.getvalue()
            outbuffer.close()
            del outbuffer
            #del writer.adapter
            #del writer.story
            del writer
            #del adapter.story
            del adapter

            # logging.debug("Email: %s"%email)
            # if email and re.match(r"^[^@]+@[^@]+", email):
            #     try:
            #         logging.info("Email Attempt")
            #         send_mail_attachment(user.email(),
            #                              email.strip(),
            #                              download.title + " by " + download.author,
            #                              download.title + " by " + download.author + " URL: "+download.url,
            #                              download.name,
            #                              data)
            #         logging.info("Email Sent")
            #     except Exception as e:
            #         # download.failure = "Failed to send Email %s"%unicode(e)
            #         logging.warn(e, exc_info=True)

            # epubs are all already compressed.  Each chunk is
            # compressed individually to avoid having to hold the
            # whole in memory just for the compress/uncompress.
            if format != 'epub':
                def compress(data):
                    return zlib.compress(data)
            else:
                def compress(data):
                    return data

            # delete existing chunks first
            for chunk in download.data_chunks:
                chunk.delete()

            index=0
            while( len(data) > 0 ):
                # logging.info("len(data): %s" % len(data))
                DownloadData(download=download,
                             index=index,
                             blob=compress(data[:1000000])).put()
                index += 1
                data = data[1000000:]
            download.completed=True
            download.put()

            smetal = SavedMeta.all().filter('url =', allmeta['storyUrl'] ).fetch(1)
            if smetal and smetal[0]:
                smeta = smetal[0]
                smeta.count += 1
            else:
                smeta=SavedMeta()
                smeta.count = 1

            smeta.url = allmeta['storyUrl']
            smeta.title = allmeta['title']
            smeta.author = allmeta['author']
            smeta.meta = allmeta
            smeta.date = datetime.datetime.now()
            smeta.put()

            logging.info("Download finished OK")
            del data

        except Exception as e:
            logging.exception(e)
            download.failure = unicode(e)
            download.put()
            return

        return
예제 #9
0
파일: main.py 프로젝트: JimmXinu/FanFicFare
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return

        format = self.request.get('format')
        url = self.request.get('url')

        if not url or url.strip() == "":
            self.redirect('/')
            return

        # Allow chapter range with URL.
        # like test1.com?sid=5[4-6] or [4,6]
        url,ch_begin,ch_end = adapters.get_url_chapter_range(url)

        logging.info("Queuing Download: %s" % url)
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult') == "on"
        email = self.request.get('email')

        # use existing record if available.  Fetched/Created before
        # the adapter can normalize the URL in case we need to record
        # an exception.
        download = getDownloadMeta(url=url,user=user,format=format,new=True)

        adapter = None
        try:
            try:
                configuration = self.getUserConfig(user,url,format)
            except exceptions.UnknownSite:
                self.redirect("/?error=custom&errtext=%s"%urllib.quote("Unsupported site in URL (%s).  See 'Support sites' list below."%url,''))
                return
            except Exception as e:
                self.redirect("/?error=custom&errtext=%s"%urllib.quote("There's an error in your User Configuration: "+unicode(e),'')[:2048]) # limited due to Locatton header length limit.
                return

            adapter = adapters.getAdapter(configuration,url)
            adapter.setChaptersRange(ch_begin,ch_end)
            logging.info('Created an adaper: %s' % adapter)

            if login or password:
                adapter.username=login
                adapter.password=password
            adapter.is_adult=is_adult

            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
            ## without waiting for the queue.  So I think
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()

            ## Fetch again using normalized story URL.  The one
            ## fetched/created above, if different, will not be saved.
            download = getDownloadMeta(url=story.getMetadata('storyUrl'),
                                       user=user,format=format,new=True)

            download.title = story.getMetadata('title')
            download.author = story.getMetadata('author')
            download.url = story.getMetadata('storyUrl')
            download.ch_begin = ch_begin
            download.ch_end = ch_end
            download.put()

            taskqueue.add(url='/fdowntask',
                      queue_name="download",
                          params={'id':unicode(download.key()),
                                  'format':format,
                                  'url':download.url,
                                  'login':login,
                                  'password':password,
                                  'user':user.email(),
                                  'email':email,
                                  'is_adult':is_adult})

            logging.info("enqueued download key: " + unicode(download.key()))

        except (exceptions.FailedToLogin,exceptions.AdultCheckRequired), e:
            download.failure = unicode(e)
            download.put()
            logging.info(unicode(e))
            is_login= ( isinstance(e, exceptions.FailedToLogin) )
            is_passwdonly = is_login and e.passwdonly
            template_values = dict(nickname = user.nickname(),
                                   url = url,
                                   format = format,
                                   site = adapter.getConfigSection(),
                                   fic = download,
                                   is_login=is_login,
                                   is_passwdonly=is_passwdonly
                                   )
            # thewriterscoffeeshop.com can do adult check *and* user required.
            if isinstance(e,exceptions.AdultCheckRequired):
                template_values['login']=login
                template_values['password']=password

            path = os.path.join(os.path.dirname(__file__), 'login.html')
            self.response.out.write(template.render(path, template_values))
            return
예제 #10
0
파일: main.py 프로젝트: gcomyn/FanFicFare
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return

        format = self.request.get('format')
        url = self.request.get('url')

        if not url or url.strip() == "":
            self.redirect('/')
            return

        # Allow chapter range with URL.
        # test1.com?sid=5[4-6]
        mc = re.match(r"^(?P<url>.*?)(?:\[(?P<begin>\d+)?(?P<comma>[,-])?(?P<end>\d+)?\])?$",url)
        #print("url:(%s) begin:(%s) end:(%s)"%(mc.group('url'),mc.group('begin'),mc.group('end')))
        url = mc.group('url')
        ch_begin = mc.group('begin')
        ch_end = mc.group('end')
        if ch_begin and not mc.group('comma'):
            ch_end = ch_begin

        logging.info("Queuing Download: %s" % url)
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult') == "on"

        # use existing record if available.  Fetched/Created before
        # the adapter can normalize the URL in case we need to record
        # an exception.
        download = getDownloadMeta(url=url,user=user,format=format,new=True)

        adapter = None
        try:
            try:
                configuration = self.getUserConfig(user,url,format)
            except exceptions.UnknownSite:
                self.redirect("/?error=custom&errtext=%s"%urllib.quote("Unsupported site in URL (%s).  See 'Support sites' list below."%url,''))
                return
            except Exception, e:
                self.redirect("/?error=custom&errtext=%s"%urllib.quote("There's an error in your User Configuration: "+unicode(e),'')[:2048]) # limited due to Locatton header length limit.
                return

            adapter = adapters.getAdapter(configuration,url)
            adapter.setChaptersRange(ch_begin,ch_end)
            logging.info('Created an adaper: %s' % adapter)

            if login or password:
                adapter.username=login
                adapter.password=password
            adapter.is_adult=is_adult

            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
            ## without waiting for the queue.  So I think
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()

            ## Fetch again using normalized story URL.  The one
            ## fetched/created above, if different, will not be saved.
            download = getDownloadMeta(url=story.getMetadata('storyUrl'),
                                       user=user,format=format,new=True)

            download.title = story.getMetadata('title')
            download.author = story.getMetadata('author')
            download.url = story.getMetadata('storyUrl')
            download.ch_begin = ch_begin
            download.ch_end = ch_end
            download.put()

            taskqueue.add(url='/fdowntask',
                      queue_name="download",
                          params={'id':unicode(download.key()),
                                  'format':format,
                                  'url':download.url,
                                  'login':login,
                                  'password':password,
                                  'user':user.email(),
                                  'is_adult':is_adult})

            logging.info("enqueued download key: " + unicode(download.key()))
예제 #11
0
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return

        format = self.request.get('format')
        url = self.request.get('url')

        if not url or url.strip() == "":
            self.redirect('/')
            return

        logging.info("Queuing Download: %s" % url)
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult') == "on"

        # use existing record if available.  Fetched/Created before
        # the adapter can normalize the URL in case we need to record
        # an exception.
        download = getDownloadMeta(url=url,user=user,format=format,new=True)

        adapter = None
        try:
            try:
                configuration = self.getUserConfig(user,url,format)
            except Exception, e:
                self.redirect("/?error=custom&errtext=%s"%urlEscape("There's an error in your User Configuration: "+str(e)))
                return

            adapter = adapters.getAdapter(configuration,url)
            logging.info('Created an adaper: %s' % adapter)

            if login or password:
                adapter.username=login
                adapter.password=password
            adapter.is_adult=is_adult

            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
            ## without waiting for the queue.  So I think
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()

            ## Fetch again using normalized story URL.  The one
            ## fetched/created above, if different, will not be saved.
            download = getDownloadMeta(url=story.getMetadata('storyUrl'),
                                       user=user,format=format,new=True)

            download.title = story.getMetadata('title')
            download.author = story.getMetadata('author')
            download.url = story.getMetadata('storyUrl')
            download.put()

            taskqueue.add(url='/fdowntask',
                      queue_name="download",
                          params={'id':str(download.key()),
                                  'format':format,
                                  'url':download.url,
                                  'login':login,
                                  'password':password,
                                  'user':user.email(),
                                  'is_adult':is_adult})

            logging.info("enqueued download key: " + str(download.key()))
예제 #12
0
def get_fff_adapter(url,fileform="epub",personalini=None):
    return adapters.getAdapter(get_fff_config(url,fileform,personalini),url)
예제 #13
0
파일: main.py 프로젝트: MegMM/FanFicFare
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return

        format = self.request.get('format')
        url = self.request.get('url')

        if not url or url.strip() == "":
            self.redirect('/')
            return

        # Allow chapter range with URL.
        # like test1.com?sid=5[4-6] or [4,6]
        url, ch_begin, ch_end = adapters.get_url_chapter_range(url)

        logging.info("Queuing Download: %s" % url)
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult') == "on"
        email = self.request.get('email')

        # use existing record if available.  Fetched/Created before
        # the adapter can normalize the URL in case we need to record
        # an exception.
        download = getDownloadMeta(url=url, user=user, format=format, new=True)

        adapter = None
        try:
            try:
                configuration = self.getUserConfig(user, url, format)
            except exceptions.UnknownSite:
                self.redirect("/?error=custom&errtext=%s" % urllib.quote(
                    "Unsupported site in URL (%s).  See 'Support sites' list below."
                    % url, ''))
                return
            except Exception as e:
                self.redirect("/?error=custom&errtext=%s" % urllib.quote(
                    "There's an error in your User Configuration: " +
                    unicode(e),
                    '')[:2048])  # limited due to Locatton header length limit.
                return

            adapter = adapters.getAdapter(configuration, url)
            adapter.setChaptersRange(ch_begin, ch_end)
            logging.info('Created an adaper: %s' % adapter)

            if login or password:
                adapter.username = login
                adapter.password = password
            adapter.is_adult = is_adult

            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
            ## without waiting for the queue.  So I think
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()

            ## Fetch again using normalized story URL.  The one
            ## fetched/created above, if different, will not be saved.
            download = getDownloadMeta(url=story.getMetadata('storyUrl'),
                                       user=user,
                                       format=format,
                                       new=True)

            download.title = story.getMetadata('title')
            download.author = story.getMetadata('author')
            download.url = story.getMetadata('storyUrl')
            download.ch_begin = ch_begin
            download.ch_end = ch_end
            download.put()

            taskqueue.add(url='/fdowntask',
                          queue_name="download",
                          params={
                              'id': unicode(download.key()),
                              'format': format,
                              'url': download.url,
                              'login': login,
                              'password': password,
                              'user': user.email(),
                              'email': email,
                              'is_adult': is_adult
                          })

            logging.info("enqueued download key: " + unicode(download.key()))

        except (exceptions.FailedToLogin, exceptions.AdultCheckRequired), e:
            download.failure = unicode(e)
            download.put()
            logging.info(unicode(e))
            is_login = (isinstance(e, exceptions.FailedToLogin))
            is_passwdonly = is_login and e.passwdonly
            template_values = dict(nickname=user.nickname(),
                                   url=url,
                                   format=format,
                                   site=adapter.getConfigSection(),
                                   fic=download,
                                   is_login=is_login,
                                   is_passwdonly=is_passwdonly)
            # thewriterscoffeeshop.com can do adult check *and* user required.
            if isinstance(e, exceptions.AdultCheckRequired):
                template_values['login'] = login
                template_values['password'] = password

            path = os.path.join(os.path.dirname(__file__), 'login.html')
            self.response.out.write(template.render(path, template_values))
            return
예제 #14
0
파일: main.py 프로젝트: k3sava/FanFicFare
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return

        format = self.request.get('format')
        url = self.request.get('url')

        if not url or url.strip() == "":
            self.redirect('/')
            return

        # Allow chapter range with URL.
        # test1.com?sid=5[4-6]
        mc = re.match(
            r"^(?P<url>.*?)(?:\[(?P<begin>\d+)?(?P<comma>[,-])?(?P<end>\d+)?\])?$",
            url)
        #print("url:(%s) begin:(%s) end:(%s)"%(mc.group('url'),mc.group('begin'),mc.group('end')))
        url = mc.group('url')
        ch_begin = mc.group('begin')
        ch_end = mc.group('end')
        if ch_begin and not mc.group('comma'):
            ch_end = ch_begin

        logging.info("Queuing Download: %s" % url)
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult') == "on"

        # use existing record if available.  Fetched/Created before
        # the adapter can normalize the URL in case we need to record
        # an exception.
        download = getDownloadMeta(url=url, user=user, format=format, new=True)

        adapter = None
        try:
            try:
                configuration = self.getUserConfig(user, url, format)
            except Exception, e:
                self.redirect(
                    "/?error=custom&errtext=%s" %
                    urlEscape("There's an error in your User Configuration: " +
                              unicode(e)))
                return

            adapter = adapters.getAdapter(configuration, url)
            adapter.setChaptersRange(ch_begin, ch_end)
            logging.info('Created an adaper: %s' % adapter)

            if login or password:
                adapter.username = login
                adapter.password = password
            adapter.is_adult = is_adult

            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
            ## without waiting for the queue.  So I think
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()

            ## Fetch again using normalized story URL.  The one
            ## fetched/created above, if different, will not be saved.
            download = getDownloadMeta(url=story.getMetadata('storyUrl'),
                                       user=user,
                                       format=format,
                                       new=True)

            download.title = story.getMetadata('title')
            download.author = story.getMetadata('author')
            download.url = story.getMetadata('storyUrl')
            download.ch_begin = ch_begin
            download.ch_end = ch_end
            download.put()

            taskqueue.add(url='/fdowntask',
                          queue_name="download",
                          params={
                              'id': unicode(download.key()),
                              'format': format,
                              'url': download.url,
                              'login': login,
                              'password': password,
                              'user': user.email(),
                              'is_adult': is_adult
                          })

            logging.info("enqueued download key: " + unicode(download.key()))
예제 #15
0
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        user = users.get_current_user()
        if not user:
            self.redirect(users.create_login_url(self.request.uri))
            return

        format = self.request.get("format")
        url = self.request.get("url")

        if not url or url.strip() == "":
            self.redirect("/")
            return

        # Allow chapter range with URL.
        # test1.com?sid=5[4-6]
        mc = re.match(r"^(?P<url>.*?)(?:\[(?P<begin>\d+)?(?P<comma>[,-])?(?P<end>\d+)?\])?$", url)
        # print("url:(%s) begin:(%s) end:(%s)"%(mc.group('url'),mc.group('begin'),mc.group('end')))
        url = mc.group("url")
        ch_begin = mc.group("begin")
        ch_end = mc.group("end")
        if ch_begin and not mc.group("comma"):
            ch_end = ch_begin

        logging.info("Queuing Download: %s" % url)
        login = self.request.get("login")
        password = self.request.get("password")
        is_adult = self.request.get("is_adult") == "on"

        # use existing record if available.  Fetched/Created before
        # the adapter can normalize the URL in case we need to record
        # an exception.
        download = getDownloadMeta(url=url, user=user, format=format, new=True)

        adapter = None
        try:
            try:
                configuration = self.getUserConfig(user, url, format)
            except Exception, e:
                self.redirect(
                    "/?error=custom&errtext=%s" % urlEscape("There's an error in your User Configuration: " + str(e))
                )
                return

            adapter = adapters.getAdapter(configuration, url)
            adapter.setChaptersRange(ch_begin, ch_end)
            logging.info("Created an adaper: %s" % adapter)

            if login or password:
                adapter.username = login
                adapter.password = password
            adapter.is_adult = is_adult

            ## This scrapes the metadata, which will be
            ## duplicated in the queue task, but it
            ## detects bad URLs, bad login, bad story, etc
            ## without waiting for the queue.  So I think
            ## it's worth the double up.  Could maybe save
            ## it all in the download object someday.
            story = adapter.getStoryMetadataOnly()

            ## Fetch again using normalized story URL.  The one
            ## fetched/created above, if different, will not be saved.
            download = getDownloadMeta(url=story.getMetadata("storyUrl"), user=user, format=format, new=True)

            download.title = story.getMetadata("title")
            download.author = story.getMetadata("author")
            download.url = story.getMetadata("storyUrl")
            download.ch_begin = ch_begin
            download.ch_end = ch_end
            download.put()

            taskqueue.add(
                url="/fdowntask",
                queue_name="download",
                params={
                    "id": str(download.key()),
                    "format": format,
                    "url": download.url,
                    "login": login,
                    "password": password,
                    "user": user.email(),
                    "is_adult": is_adult,
                },
            )

            logging.info("enqueued download key: " + str(download.key()))
예제 #16
0
파일: main.py 프로젝트: k3sava/FanFicFare
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        fileId = self.request.get('id')
        # User object can't pass, just email address
        user = users.User(self.request.get('user'))
        format = self.request.get('format')
        url = self.request.get('url')
        login = self.request.get('login')
        password = self.request.get('password')
        is_adult = self.request.get('is_adult')

        logging.info("Downloading: " + url + " for user: "******"ID: " + fileId)

        adapter = None
        writerClass = None

        # use existing record if available.
        # fileId should have record from /fdown.
        download = getDownloadMeta(id=fileId,
                                   url=url,
                                   user=user,
                                   format=format,
                                   new=True)
        for c in download.data_chunks:
            c.delete()
        download.put()

        logging.info('Creating adapter...')

        try:
            configuration = self.getUserConfig(user, url, format)
            adapter = adapters.getAdapter(configuration, url)
            adapter.setChaptersRange(download.ch_begin, download.ch_end)

            logging.info('Created an adapter: %s' % adapter)

            if login or password:
                adapter.username = login
                adapter.password = password
            adapter.is_adult = is_adult

            # adapter.getStory() is what does all the heavy lifting.
            # adapter.getStoryMetadataOnly() only fetches enough to
            # get metadata.  writer.writeStory() will call
            # adapter.getStory(), too.
            writer = writers.getWriter(format, configuration, adapter)
            download.name = writer.getOutputFileName()
            #logging.debug('output_filename:'+writer.getConfig('output_filename'))
            logging.debug('getOutputFileName:' + writer.getOutputFileName())
            download.title = adapter.getStory().getMetadata('title')
            download.author = adapter.getStory().getMetadata('author')
            download.url = adapter.getStory().getMetadata('storyUrl')
            download.put()

            allmeta = adapter.getStory().getAllMetadata(removeallentities=True,
                                                        doreplacements=False)

            outbuffer = StringIO()
            writer.writeStory(outbuffer)
            data = outbuffer.getvalue()
            outbuffer.close()
            del outbuffer
            #del writer.adapter
            #del writer.story
            del writer
            #del adapter.story
            del adapter

            # epubs are all already compressed.  Each chunk is
            # compressed individually to avoid having to hold the
            # whole in memory just for the compress/uncompress.
            if format != 'epub':

                def c(data):
                    return zlib.compress(data)
            else:

                def c(data):
                    return data

            # delete existing chunks first
            for c in download.data_chunks:
                c.delete()

            index = 0
            while (len(data) > 0):
                DownloadData(download=download,
                             index=index,
                             blob=c(data[:1000000])).put()
                index += 1
                data = data[1000000:]
            download.completed = True
            download.put()

            smetal = SavedMeta.all().filter('url =',
                                            allmeta['storyUrl']).fetch(1)
            if smetal and smetal[0]:
                smeta = smetal[0]
                smeta.count += 1
            else:
                smeta = SavedMeta()
                smeta.count = 1

            smeta.url = allmeta['storyUrl']
            smeta.title = allmeta['title']
            smeta.author = allmeta['author']
            smeta.meta = allmeta
            smeta.date = datetime.datetime.now()
            smeta.put()

            logging.info("Download finished OK")
            del data

        except Exception, e:
            logging.exception(e)
            download.failure = unicode(e)
            download.put()
            return
예제 #17
0
    def post(self):
        logging.getLogger().setLevel(logging.DEBUG)
        fileId = self.request.get("id")
        # User object can't pass, just email address
        user = users.User(self.request.get("user"))
        format = self.request.get("format")
        url = self.request.get("url")
        login = self.request.get("login")
        password = self.request.get("password")
        is_adult = self.request.get("is_adult")

        logging.info("Downloading: " + url + " for user: "******"ID: " + fileId)

        adapter = None
        writerClass = None

        # use existing record if available.
        # fileId should have record from /fdown.
        download = getDownloadMeta(id=fileId, url=url, user=user, format=format, new=True)
        for c in download.data_chunks:
            c.delete()
        download.put()

        logging.info("Creating adapter...")

        try:
            configuration = self.getUserConfig(user, url, format)
            adapter = adapters.getAdapter(configuration, url)
            adapter.setChaptersRange(download.ch_begin, download.ch_end)

            logging.info("Created an adapter: %s" % adapter)

            if login or password:
                adapter.username = login
                adapter.password = password
            adapter.is_adult = is_adult

            # adapter.getStory() is what does all the heavy lifting.
            # adapter.getStoryMetadataOnly() only fetches enough to
            # get metadata.  writer.writeStory() will call
            # adapter.getStory(), too.
            writer = writers.getWriter(format, configuration, adapter)
            download.name = writer.getOutputFileName()
            # logging.debug('output_filename:'+writer.getConfig('output_filename'))
            logging.debug("getOutputFileName:" + writer.getOutputFileName())
            download.title = adapter.getStory().getMetadata("title")
            download.author = adapter.getStory().getMetadata("author")
            download.url = adapter.getStory().getMetadata("storyUrl")
            download.put()

            allmeta = adapter.getStory().getAllMetadata(removeallentities=True, doreplacements=False)

            outbuffer = StringIO()
            writer.writeStory(outbuffer)
            data = outbuffer.getvalue()
            outbuffer.close()
            del outbuffer
            # del writer.adapter
            # del writer.story
            del writer
            # del adapter.story
            del adapter

            # epubs are all already compressed.  Each chunk is
            # compressed individually to avoid having to hold the
            # whole in memory just for the compress/uncompress.
            if format != "epub":

                def c(data):
                    return zlib.compress(data)

            else:

                def c(data):
                    return data

            # delete existing chunks first
            for c in download.data_chunks:
                c.delete()

            index = 0
            while len(data) > 0:
                DownloadData(download=download, index=index, blob=c(data[:1000000])).put()
                index += 1
                data = data[1000000:]
            download.completed = True
            download.put()

            smetal = SavedMeta.all().filter("url =", allmeta["storyUrl"]).fetch(1)
            if smetal and smetal[0]:
                smeta = smetal[0]
                smeta.count += 1
            else:
                smeta = SavedMeta()
                smeta.count = 1

            smeta.url = allmeta["storyUrl"]
            smeta.title = allmeta["title"]
            smeta.author = allmeta["author"]
            smeta.meta = allmeta
            smeta.date = datetime.datetime.now()
            smeta.put()

            logging.info("Download finished OK")
            del data

        except Exception, e:
            logging.exception(e)
            download.failure = unicode(e)
            download.put()
            return
예제 #18
0
def do_download(arg,
                options,
                passed_defaultsini,
                passed_personalini,
                warn=print,
                fail=print):

    # Attempt to update an existing epub.
    chaptercount = None
    output_filename = None

    if options.unnew:
        # remove mark_new_chapters marks
        reset_orig_chapters_epub(arg, arg)
        return

    if options.update:
        try:
            url, chaptercount = get_dcsource_chaptercount(arg)
            if not url:
                fail('No story URL found in epub to update.')
                return
            print('Updating %s, URL: %s' % (arg, url))
            output_filename = arg
        except Exception:
            # if there's an error reading the update file, maybe it's a URL?
            # we'll look for an existing outputfile down below.
            url = arg
    else:
        url = arg

    configuration = get_configuration(url, passed_defaultsini,
                                      passed_personalini, options,
                                      chaptercount, output_filename)

    try:
        # Allow chapter range with URL.
        # like test1.com?sid=5[4-6] or [4,6]
        # Overrides CLI options if present.
        url, ch_begin, ch_end = adapters.get_url_chapter_range(url)

        adapter = adapters.getAdapter(configuration, url)

        # url[begin-end] overrides CLI option if present.
        if ch_begin or ch_end:
            adapter.setChaptersRange(ch_begin, ch_end)
        else:
            adapter.setChaptersRange(options.begin, options.end)

        # check for updating from URL (vs from file)
        update_story = options.update
        if update_story and not chaptercount:
            try:
                writer = writers.getWriter('epub', configuration, adapter)
                output_filename = writer.getOutputFileName()
                noturl, chaptercount = get_dcsource_chaptercount(
                    output_filename)
                print('Updating %s, URL: %s' % (output_filename, url))
            except Exception as e:
                warn(
                    "Failed to read epub for update: (%s) Continuing with update=false"
                    % e)
                update_story = False

        # Check for include_images without no_image_processing. In absence of PIL, give warning.
        if adapter.getConfig('include_images') and not adapter.getConfig(
                'no_image_processing'):
            try:
                from calibre.utils.magick import Image
            except ImportError:
                try:
                    ## Pillow is a more current fork of PIL library
                    from PIL import Image
                except ImportError:
                    try:
                        import Image
                    except ImportError:
                        print(
                            "You have include_images enabled, but Python Image Library(PIL) isn't found.\nImages will be included full size in original format.\nContinue? (y/n)?"
                        )
                        if options.interactive:
                            if not sys.stdin.readline().strip().lower(
                            ).startswith('y'):
                                return
                        else:
                            # for non-interactive, default the response to yes and continue processing
                            print('y')

        # three tries, that's enough if both user/pass & is_adult needed,
        # or a couple tries of one or the other
        for x in range(0, 2):
            try:
                adapter.getStoryMetadataOnly()
            except exceptions.FailedToLogin as f:
                if not options.interactive:
                    print(
                        'Login Failed on non-interactive process. Set username and password in personal.ini.'
                    )
                    return
                if f.passwdonly:
                    print('Story requires a password.')
                else:
                    print('Login Failed, Need Username/Password.')
                    sys.stdout.write('Username: '******'Password: '******'Login: `%s`, Password: `%s`' % (adapter.username, adapter.password))
            except exceptions.AdultCheckRequired:
                if options.interactive:
                    print(
                        'Please confirm you are an adult in your locale: (y/n)?'
                    )
                    if sys.stdin.readline().strip().lower().startswith('y'):
                        adapter.is_adult = True
                else:
                    print(
                        'Adult check required on non-interactive process. Set is_adult:true in personal.ini or pass -o "is_adult=true" to the command.'
                    )
                    return

        if update_story and not options.force:
            urlchaptercount = int(adapter.getStoryMetadataOnly().getMetadata(
                'numChapters').replace(',', ''))
            # returns int adjusted for start-end range.
            urlchaptercount = adapter.getStoryMetadataOnly().getChapterCount()

            if chaptercount == urlchaptercount and not options.metaonly and not options.updatealways:
                print('%s already contains %d chapters.' %
                      (output_filename, chaptercount))
            elif chaptercount > urlchaptercount:
                warn('%s contains %d chapters, more than source: %d.' %
                     (output_filename, chaptercount, urlchaptercount))
            elif chaptercount == 0:
                warn(
                    "%s doesn't contain any recognizable chapters, probably from a different source.  Not updating."
                    % output_filename)
            else:
                # update now handled by pre-populating the old
                # images and chapters in the adapter rather than
                # merging epubs.
                (url, chaptercount, adapter.oldchapters, adapter.oldimgs,
                 adapter.oldcover, adapter.calibrebookmark, adapter.logfile,
                 adapter.oldchaptersmap, adapter.oldchaptersdata) = (
                     get_update_data(output_filename))[0:9]

                print('Do update - epub(%d) vs url(%d)' %
                      (chaptercount, urlchaptercount))

                if not update_story and chaptercount == urlchaptercount and adapter.getConfig(
                        'do_update_hook'):
                    adapter.hookForUpdates(chaptercount)

                if adapter.getConfig('pre_process_safepattern'):
                    metadata = adapter.story.get_filename_safe_metadata(
                        pattern=adapter.getConfig('pre_process_safepattern'))
                else:
                    metadata = adapter.story.getAllMetadata()
                call(string.Template(
                    adapter.getConfig('pre_process_cmd')).substitute(metadata),
                     shell=True)

                output_filename = write_story(configuration,
                                              adapter,
                                              'epub',
                                              nooutput=options.nooutput)

        else:
            if not options.metaonly and adapter.getConfig('pre_process_cmd'):
                if adapter.getConfig('pre_process_safepattern'):
                    metadata = adapter.story.get_filename_safe_metadata(
                        pattern=adapter.getConfig('pre_process_safepattern'))
                else:
                    metadata = adapter.story.getAllMetadata()
                call(string.Template(
                    adapter.getConfig('pre_process_cmd')).substitute(metadata),
                     shell=True)

            output_filename = write_story(configuration,
                                          adapter,
                                          options.format,
                                          metaonly=options.metaonly,
                                          nooutput=options.nooutput)

            if options.metaonly and not options.jsonmeta:
                metadata = adapter.getStoryMetadataOnly().getAllMetadata()
                metadata['output_filename'] = output_filename
                if not options.nometachapters:
                    metadata['zchapters'] = []
                    for i, chap in enumerate(adapter.get_chapters()):
                        metadata['zchapters'].append((i + 1, chap))
                else:
                    # If no chapters, also suppress output_css so
                    # metadata is shorter.
                    del metadata['output_css']
                pprint.pprint(metadata)

        if not options.metaonly and adapter.getConfig('post_process_cmd'):
            if adapter.getConfig('post_process_safepattern'):
                metadata = adapter.story.get_filename_safe_metadata(
                    pattern=adapter.getConfig('post_process_safepattern'))
            else:
                metadata = adapter.story.getAllMetadata()
            metadata['output_filename'] = output_filename
            call(string.Template(
                adapter.getConfig('post_process_cmd')).substitute(metadata),
                 shell=True)

        if options.jsonmeta or options.jsonmetafile:
            metadata = adapter.getStoryMetadataOnly().getAllMetadata()
            metadata['output_filename'] = output_filename
            if not options.nometachapters:
                metadata['zchapters'] = []
                for i, chap in enumerate(adapter.get_chapters()):
                    metadata['zchapters'].append((i + 1, chap))
            import json
            if options.jsonmeta:
                print(
                    json.dumps(metadata,
                               sort_keys=True,
                               indent=2,
                               separators=(',', ':')))
            if options.jsonmetafile:
                with open(output_filename + ".json", "w") as jsonfile:
                    json.dump(metadata,
                              jsonfile,
                              sort_keys=True,
                              indent=2,
                              separators=(',', ':'))
        if adapter.story.chapter_error_count > 0:
            warn(
                "===================\n!!!! %s chapters errored downloading %s !!!!\n==================="
                % (adapter.story.chapter_error_count, url))
        del adapter

    except exceptions.InvalidStoryURL as isu:
        fail(isu)
    except exceptions.StoryDoesNotExist as dne:
        fail(dne)
    except exceptions.UnknownSite as us:
        fail(us)
    except exceptions.AccessDenied as ad:
        fail(ad)
예제 #19
0
def do_download_for_worker(book, options, merge, notification=lambda x, y: x):
    '''
    Child job, to download story when run as a worker job
    '''

    from calibre_plugins.fanficfare_plugin import FanFicFareBase
    fffbase = FanFicFareBase(options['plugin_path'])
    with fffbase:  # so the sys.path was modified while loading the
        # plug impl.
        from calibre_plugins.fanficfare_plugin.dialogs import NotGoingToDownload
        from calibre_plugins.fanficfare_plugin.prefs import (
            SAVE_YES, SAVE_YES_UNLESS_SITE, OVERWRITE, OVERWRITEALWAYS, UPDATE,
            UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL)
        from calibre_plugins.fanficfare_plugin.wordcount import get_word_count
        from fanficfare import adapters, writers
        from fanficfare.epubutils import get_update_data
        from fanficfare.six import text_type as unicode

        from calibre_plugins.fanficfare_plugin.fff_util import get_fff_config

        try:
            logger.info("\n\n" + ("-" * 80) + " " + book['url'])
            ## No need to download at all.  Can happen now due to
            ## collision moving into book for CALIBREONLY changing to
            ## ADDNEW when story URL not in library.
            if book['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL):
                logger.info("Skipping CALIBREONLY 'update' down inside worker")
                return book

            book['comment'] = _('Download started...')

            configuration = get_fff_config(book['url'], options['fileform'],
                                           options['personal.ini'])

            if not options[
                    'updateepubcover'] and 'epub_for_update' in book and book[
                        'collision'] in (UPDATE, UPDATEALWAYS):
                configuration.set("overrides", "never_make_cover", "true")

            # images only for epub, html, even if the user mistakenly
            # turned it on else where.
            if options['fileform'] not in ("epub", "html"):
                configuration.set("overrides", "include_images", "false")

            adapter = adapters.getAdapter(configuration, book['url'])
            adapter.is_adult = book['is_adult']
            adapter.username = book['username']
            adapter.password = book['password']
            adapter.setChaptersRange(book['begin'], book['end'])

            ## each site download job starts with a new copy of the
            ## cookiejar and basic_cache from the FG process.  They
            ## are not shared between different sites' BG downloads
            if configuration.getConfig('use_browser_cache'):
                if 'browser_cache' in options:
                    configuration.set_browser_cache(options['browser_cache'])
                else:
                    options['browser_cache'] = configuration.get_browser_cache(
                    )
                    if 'browser_cachefile' in options:
                        options['browser_cache'].load_cache(
                            options['browser_cachefile'])
            if 'basic_cache' in options:
                configuration.set_basic_cache(options['basic_cache'])
            else:
                options['basic_cache'] = configuration.get_basic_cache()
                options['basic_cache'].load_cache(options['basic_cachefile'])
            if 'cookiejar' in options:
                configuration.set_cookiejar(options['cookiejar'])
            else:
                options['cookiejar'] = configuration.get_cookiejar()
                options['cookiejar'].load_cookiejar(options['cookiejarfile'])

            story = adapter.getStoryMetadataOnly()
            if not story.getMetadata("series") and 'calibre_series' in book:
                adapter.setSeries(book['calibre_series'][0],
                                  book['calibre_series'][1])

            # set PI version instead of default.
            if 'version' in options:
                story.setMetadata('version', options['version'])

            book['title'] = story.getMetadata("title", removeallentities=True)
            book['author_sort'] = book['author'] = story.getList(
                "author", removeallentities=True)
            book['publisher'] = story.getMetadata("publisher")
            book['url'] = story.getMetadata("storyUrl", removeallentities=True)
            book['tags'] = story.getSubjectTags(removeallentities=True)
            book['comments'] = story.get_sanitized_description()
            book['series'] = story.getMetadata("series",
                                               removeallentities=True)

            if story.getMetadataRaw('datePublished'):
                book['pubdate'] = story.getMetadataRaw(
                    'datePublished').replace(tzinfo=local_tz)
            if story.getMetadataRaw('dateUpdated'):
                book['updatedate'] = story.getMetadataRaw(
                    'dateUpdated').replace(tzinfo=local_tz)
            if story.getMetadataRaw('dateCreated'):
                book['timestamp'] = story.getMetadataRaw(
                    'dateCreated').replace(tzinfo=local_tz)
            else:
                book['timestamp'] = datetime.now().replace(
                    tzinfo=local_tz)  # need *something* there for calibre.

            writer = writers.getWriter(options['fileform'], configuration,
                                       adapter)
            outfile = book['outfile']

            ## checks were done earlier, it's new or not dup or newer--just write it.
            if book['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \
                    ('epub_for_update' not in book and book['collision'] in (UPDATE, UPDATEALWAYS)):

                # preserve logfile even on overwrite.
                if 'epub_for_update' in book:
                    adapter.logfile = get_update_data(
                        book['epub_for_update'])[6]
                    # change the existing entries id to notid so
                    # write_epub writes a whole new set to indicate overwrite.
                    if adapter.logfile:
                        adapter.logfile = adapter.logfile.replace(
                            "span id", "span notid")

                if book['collision'] == OVERWRITE and 'fileupdated' in book:
                    lastupdated = story.getMetadataRaw('dateUpdated')
                    fileupdated = book['fileupdated']

                    # updated doesn't have time (or is midnight), use dates only.
                    # updated does have time, use full timestamps.
                    if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \
                            (lastupdated.time() != time.min and fileupdated > lastupdated):
                        raise NotGoingToDownload(
                            _("Not Overwriting, web site is not newer."),
                            'edit-undo.png',
                            showerror=False)

                logger.info("write to %s" % outfile)
                inject_cal_cols(book, story, configuration)
                writer.writeStory(outfilename=outfile,
                                  forceOverwrite=True,
                                  notification=notification)

                if adapter.story.chapter_error_count > 0:
                    book['comment'] = _('Download %(fileform)s completed, %(failed)s failed chapters, %(total)s total chapters.')%\
                        {'fileform':options['fileform'],
                         'failed':adapter.story.chapter_error_count,
                         'total':story.getMetadata("numChapters")}
                    book[
                        'chapter_error_count'] = adapter.story.chapter_error_count
                else:
                    book['comment'] = _('Download %(fileform)s completed, %(total)s chapters.')%\
                        {'fileform':options['fileform'],
                         'total':story.getMetadata("numChapters")}
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()

            ## checks were done earlier, just update it.
            elif 'epub_for_update' in book and book['collision'] in (
                    UPDATE, UPDATEALWAYS):

                # update now handled by pre-populating the old images and
                # chapters in the adapter rather than merging epubs.
                #urlchaptercount = int(story.getMetadata('numChapters').replace(',',''))
                # returns int adjusted for start-end range.
                urlchaptercount = story.getChapterCount()
                (url, chaptercount, adapter.oldchapters, adapter.oldimgs,
                 adapter.oldcover, adapter.calibrebookmark, adapter.logfile,
                 adapter.oldchaptersmap,
                 adapter.oldchaptersdata) = get_update_data(
                     book['epub_for_update'])[0:9]

                # dup handling from fff_plugin needed for anthology updates.
                if book['collision'] == UPDATE:
                    if chaptercount == urlchaptercount:
                        if merge:
                            book['comment'] = _(
                                "Already contains %d chapters.  Reuse as is."
                            ) % chaptercount
                            book['all_metadata'] = story.getAllMetadata(
                                removeallentities=True)
                            if options['savemetacol'] != '':
                                book['savemetacol'] = story.dump_html_metadata(
                                )
                            book['outfile'] = book[
                                'epub_for_update']  # for anthology merge ops.
                            return book
                        else:  # not merge,
                            raise NotGoingToDownload(
                                _("Already contains %d chapters.") %
                                chaptercount,
                                'edit-undo.png',
                                showerror=False)
                    elif chaptercount > urlchaptercount:
                        raise NotGoingToDownload(
                            _("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update."
                              ) % (chaptercount, urlchaptercount),
                            'dialog_error.png')
                    elif chaptercount == 0:
                        raise NotGoingToDownload(
                            _("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."
                              ), 'dialog_error.png')

                if not (book['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \
                        and adapter.getConfig("do_update_hook"):
                    chaptercount = adapter.hookForUpdates(chaptercount)

                logger.info("Do update - epub(%d) vs url(%d)" %
                            (chaptercount, urlchaptercount))
                logger.info("write to %s" % outfile)

                inject_cal_cols(book, story, configuration)
                writer.writeStory(outfilename=outfile,
                                  forceOverwrite=True,
                                  notification=notification)

                if adapter.story.chapter_error_count > 0:
                    book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters, %(failed)s failed chapters, for %(total)s total.')%\
                        {'fileform':options['fileform'],
                         'failed':adapter.story.chapter_error_count,
                         'added':(urlchaptercount-chaptercount),
                         'total':urlchaptercount}
                    book[
                        'chapter_error_count'] = adapter.story.chapter_error_count
                else:
                    book['comment'] = _('Update %(fileform)s completed, added %(added)s chapters for %(total)s total.')%\
                        {'fileform':options['fileform'],'added':(urlchaptercount-chaptercount),'total':urlchaptercount}
                book['all_metadata'] = story.getAllMetadata(
                    removeallentities=True)
                if options['savemetacol'] != '':
                    book['savemetacol'] = story.dump_html_metadata()
            else:
                ## Shouldn't ever get here, but hey, it happened once
                ## before with prefs['collision']
                raise Exception(
                    "Impossible state reached -- Book: %s:\nOptions:%s:" %
                    (book, options))

            if options['do_wordcount'] == SAVE_YES or (
                    options['do_wordcount'] == SAVE_YES_UNLESS_SITE
                    and not story.getMetadataRaw('numWords')):
                try:
                    wordcount = get_word_count(outfile)
                    # logger.info("get_word_count:%s"%wordcount)
                    story.setMetadata('numWords', wordcount)
                    writer.writeStory(outfilename=outfile, forceOverwrite=True)
                    book['all_metadata'] = story.getAllMetadata(
                        removeallentities=True)
                    if options['savemetacol'] != '':
                        book['savemetacol'] = story.dump_html_metadata()
                except:
                    logger.error("WordCount failed")

            if options['smarten_punctuation'] and options['fileform'] == "epub" \
                    and calibre_version >= (0, 9, 39):
                # for smarten punc
                from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS
                from calibre.utils.logging import Log
                from collections import namedtuple

                # do smarten_punctuation from calibre's polish feature
                data = {'smarten_punctuation': True}
                opts = ALL_OPTS.copy()
                opts.update(data)
                O = namedtuple('Options', ' '.join(six.iterkeys(ALL_OPTS)))
                opts = O(**opts)

                log = Log(level=Log.DEBUG)
                polish({outfile: outfile}, opts, log, logger.info)

        except NotGoingToDownload as d:
            book['good'] = False
            book['status'] = _('Bad')
            book['showerror'] = d.showerror
            book['comment'] = unicode(d)
            book['icon'] = d.icon

        except Exception as e:
            book['good'] = False
            book['status'] = _('Error')
            book['comment'] = unicode(e)
            book['icon'] = 'dialog_error.png'
            book['status'] = _('Error')
            logger.info("Exception: %s:%s" % (book, book['comment']),
                        exc_info=True)
    return book