Beispiel #1
0
def retrieve(key, path):
    epub = Epub()
    epub.expand(path)
    space = Space(epub)

    line_order = LineOrder(key, len(space))

    def read_int(bit_size):
        bit_buffer = ''

        for i in range(bit_size):
            if space[line_order.next()]:
                bit_buffer += '1'
            else:
                bit_buffer += '0'

        return int(bit_buffer, 2)

    header_size = read_int(HEADER_BIT_SIZE)
    message_size = read_int(header_size)

    message = ''
    for i in range(0, message_size, CHAR_BIT_SIZE):
        message += chr(read_int(CHAR_BIT_SIZE))

    epub.cleanup()

    return message
Beispiel #2
0
 def __init__(self, url = None, layout="TopShelf", accept_regexp=None, delete_regexp=None, skip=False):
   Epub.__init__(self)
   self.downloaded_files = {}
   self.current_nav = self.navigation
   self.info_title  = ""
   self.info_url    = ""
   self.info_author = []
   self.info_tags   = []
   self.info_date   = datetime.datetime.utcnow()
   self.raw         = layout == None
   self.layout      = layout
   self.replace     = []
   self.replace_show= False
   self.error       = False
   self.downloadOnce= False
   self.BeautifulParser = BeautifulSoup.BeautifulSoup
   if accept_regexp:
     self.accept_regexp = re.compile(accept_regexp)
   else:
     self.accept_regexp = None
   if delete_regexp:
     self.delete_regexp = re.compile(delete_regexp)
   else:
     self.delete_regexp = None
   self.recursion_limit = None
   self.recursion_index = 0
   if url:
     self.parse_url(url, output=(not skip))
Beispiel #3
0
def start(urls, output_dir=None, cover_path=None, out_format='epub'):
    """
    start the job using url

    Args:
        urls: A string represent the urls which was input by user
        output_dir: A string represent the path of the output EPUB file
        cover_path: A string represent the path of the EPUB cover
        out_format: A string represent the output file format
    """
    for url in urls:
        for cls in [OldLinovel, Dxs, Wenku]:
            if cls.check_url(url):
                novel = cls(url, _SINGLE_THREAD)
                novel.extract_novel_information()
                books = novel.get_novel_information()
                for book in books:
                    epub = Epub(output_dir=output_dir,
                                cover_path=cover_path,
                                out_format=out_format,
                                **book)
                    epub.generate_file()
                break
        else:
            print('URL "{}" is invalid'.format(url))
Beispiel #4
0
def start(url, output_dir=None, cover_path=None):
    s = Series(url=url)
    s.getInfo()
    volumes = s.selectVolumes()
    volumeInfos = s.info(volumes)
    
    for volumeInfo in volumeInfos:  
        epub = Epub(output_dir=None, cover_path=None, **volumeInfo)
        epub.generate_file()
        print 'Generating Epub'
Beispiel #5
0
    def save(self, *args, **kwargs):
        if not self.cover_img:
            if self.book_file.name.endswith('.epub'):
                # get the cover path from the epub file
                epub_file = Epub(self.book_file)
                cover_path = epub_file.get_cover_image_path()
                if cover_path is not None and os.path.exists(cover_path):
                    cover_file = File(open(cover_path))
                    self.cover_img.save(os.path.basename(cover_path),
                                        cover_file)
                epub_file.close()

        super(Book, self).save(*args, **kwargs)
Beispiel #6
0
def compile_epub(title,
                 author,
                 cover_type,
                 cover_bytes,
                 chapters,
                 images=[],
                 path=None,
                 metadata={}):
    """Compiles an ePub from the given arguments.
    The path is where the ePub should be saved to 
    (or with a default name in the current direcory).
    The chapters should be an iterable of (title, filename, chapter_text) pairs.
    The images should be an iterable of (title, filename, bytes) pairs
    pairs"""
    if not path:
        path = title + " - " + author + ".epub"

    print("Compiling epub...")
    with Epub(title, author, path, cover_type, cover_bytes,
              metadata=metadata) as epub:

        epub.add_cover(cover_type, cover_bytes)
        for (local_name, filename, text) in chapters:
            epub.add_chapter(local_name, filename, text)

        for (local_name, filename, image_bytes) in images:
            epub.add_image(local_name, filename, image_bytes)

    print("Done!")
    print("Saved ePub to {!r}".format(path))
Beispiel #7
0
def grab_volume(url, output_dir, cover_path, out_format):
    """
    grab volume
    
    Args:
        url: A string represent the url which was input by user
        output_dir: A string represent the path of the output EPUB file
        cover_file: A string represent the path of the EPUB cover
        out_format: A string represent the output format
    """
    try:
        print('Getting:' + url)
        novel = Novel(url=url, single_thread=_SINGLE_THREAD)
        novel.get_novel_information()
        epub = Epub(output_dir=output_dir, cover_path=cover_path, out_format=out_format, **novel.novel_information())
        epub.generate_file()

    except Exception as e:
        print('错误', str(e) + '\nAt:' + url)
        raise e
Beispiel #8
0
def grab_volume(url, output_dir, cover_path):
    """
    grab volume
    
    Args:
        url: A string represent the url which was input by user
        output_dir: A string represent the path of the output EPUB file
        cover_file: A string represent the path of the EPUB cover
    """
    try:
        print_info('Getting:' + url)
        novel = Novel(url=url, single_thread=SINGLE_THREAD)
        novel.get_novel_information()
        epub = Epub(output_dir=output_dir, cover_path=cover_path, **novel.novel_information())
        epub.generate_epub()

    except Exception as e:
        if HAS_QT:
            SENDER.sigWarningMessage.emit('错误', str(e) + '\nat:' + url)
            SENDER.sigButton.emit()
        print(url)
        raise e
Beispiel #9
0
    def clean_epub_file(self):
        """Perform basic validation of the epub_file by making sure:
        - no other existing models have the same sha256 hash.
        - it is parseable by `Epub`.

        TODO: This method is called twice during the wizard (at step 0, and
        at done()), by Django design. Still, we should look for alternatives
        in order to make sure epub validation only happens once.
        https://code.djangoproject.com/ticket/10810
        """
        data = self.cleaned_data["epub_file"]

        # Validate sha256 hash.
        sha256sum = models.sha256_sum(data)
        if models.Book.objects.filter(file_sha256sum=sha256sum).exists():
            raise forms.ValidationError("The file is already on the database")

        # Validate parseability.
        epub = None
        try:
            # Fetch information from the epub, and set it as attributes.
            epub = Epub(data)
            info_dict, cover_path, tags = epub.as_model_dict()

            # TODO: pass this info via a cleaner way.
            self.cleaned_data["original_path"] = data.name
            self.cleaned_data["info_dict"] = info_dict
            self.cleaned_data["cover_path"] = cover_path
            self.cleaned_data["file_sha256sum"] = sha256sum
        except Exception as e:
            raise forms.ValidationError(str(e))
        finally:
            # Try to remove the temp extracted epub folder.
            try:
                epub.close()
            except:
                pass

        return data
Beispiel #10
0
def start(urls, output_dir=None, cover_path=None, out_format='epub'):
    """
    start the job using url

    Args:
        urls: A string represent the urls which was input by user
        output_dir: A string represent the path of the output EPUB file
        cover_path: A string represent the path of the EPUB cover
        out_format: A string represent the output file format
    """
    for url in urls:
        for cls in [OldLinovel]:
            if cls.check_url(url):
                novel = cls(url, _SINGLE_THREAD)
                novel.extract_novel_information()
                books = novel.get_novel_information()
                for book in books:
                    epub = Epub(output_dir=output_dir, cover_path=cover_path, out_format=out_format, **book)
                    epub.generate_file()
                break
        else:
            print('URL "{}" is invalid'.format(url))
Beispiel #11
0
    def clean_epub_file(self):
        """Perform basic validation of the epub_file by making sure:
        - no other existing models have the same sha256 hash.
        - it is parseable by `Epub`.

        TODO: This method is called twice during the wizard (at step 0, and
        at done()), by Django design. Still, we should look for alternatives
        in order to make sure epub validation only happens once.
        https://code.djangoproject.com/ticket/10810
        """
        data = self.cleaned_data['epub_file']

        # Validate sha256 hash.
        sha256sum = models.sha256_sum(data)
        if models.Book.objects.filter(file_sha256sum=sha256sum).exists():
            raise forms.ValidationError('The file is already on the database')

        # Validate parseability.
        epub = None
        try:
            # Fetch information from the epub, and set it as attributes.
            epub = Epub(data)
            info_dict, cover_path, tags = epub.as_model_dict()

            # TODO: pass this info via a cleaner way.
            self.cleaned_data['original_path'] = data.name
            self.cleaned_data['info_dict'] = info_dict
            self.cleaned_data['cover_path'] = cover_path
            self.cleaned_data['file_sha256sum'] = sha256sum
        except Exception as e:
            raise forms.ValidationError(str(e))
        finally:
            # Try to remove the temp extracted epub folder.
            try:
                epub.close()
            except:
                pass

        return data
Beispiel #12
0
def store(key, message, in_path, out_path):
    epub = Epub()
    epub.expand(in_path)
    space = Space(epub)

    binary_message = ''.join([format(ord(x), '0'+str(CHAR_BIT_SIZE)+'b') for x in message])
    message_size = format(len(binary_message), 'b')
    header_size = format(len(message_size), '0'+str(HEADER_BIT_SIZE)+'b')

    bits_to_write = header_size + message_size + binary_message

    if (len(space)) < len(bits_to_write):
        print 'Error, not enough space on the epub file'

    line_order = LineOrder(key, len(space))

    add_noise(space)

    for bit in bits_to_write:
        space[line_order.next()] = (bit == '1')

    space.commit()
    epub.contract(out_path)
Beispiel #13
0
def grab_volume(url, output_dir, cover_path):
    """
    grab volume
    
    Args:
        url: A string represent the url which was input by user
        output_dir: A string represent the path of the output EPUB file
        cover_file: A string represent the path of the EPUB cover
    """
    try:
        print_info('Getting:' + url)
        novel = Novel(url=url, single_thread=SINGLE_THREAD)
        novel.get_novel_information()
        epub = Epub(output_dir=output_dir,
                    cover_path=cover_path,
                    **novel.novel_information())
        epub.generate_epub()

    except Exception as e:
        if HAS_QT:
            SENDER.sigWarningMessage.emit('错误', str(e) + '\nat:' + url)
            SENDER.sigButton.emit()
        print(url)
        raise e
Beispiel #14
0
def write_to_epub(novelids, login, session):
    for novelid in novelids:
        e = Epub()
        e.summary = get_summary(novelid, session)
        e.intro = get_intro(novelid, session)
        e.author, e.name, contents = get_contents(novelid, login, session)
        e.init()
        pic_con = get_pic(novelid, e.filename, session)
        e.write_pic(pic_con)
        e.write_coverandintro()
        e.write_chapters(contents)
        for chapter in contents:
            if chapter[0]:
                text = get_all_text(chapter[2], session)
                print(chapter[1][0], " ", chapter[1][1])
                e.write_text(chapter[1], text)
        e.packet()
    #----------------------------------------------------------------------
    def __init__(self,epub):
        """Constructor"""
        self.epub = epub
        self.webPageDownloader = WebPageDownloader.WebPageDownloader()
        
    #----------------------------------------------------------------------
    def run(self,feed):
        """"""
        if feed is None or feed.feed is None:
                    return
                
        outfolder = os.path.join(self.epub.rootDir,'temp')
        for item in feed['items']:
            for downloadedFileName, outputPath in self.webPageDownloader.download(item,outfolder):
                if downloadedFileName.endswith('.html'):
                    node = self.epub.addHtml(outputPath,downloadedFileName+'.html',None)
                    self.epub.addTocMapNode(node.destPath, downloadedFileName)
                #elif downloadedFileName.endswith('.html'):
                    
                
if __name__ == "__main__":
    epub = Epub(r'G:\zll\python\gae\test1')
    epub.addCreator('zll')
    epub.setTitle("zll's Test")
    feedVistor = FeedVistor(epub)
    feedListFilePath = os.path.join(os.path.dirname(__file__),'feed.fl')
    feedMgr = FeedManager(feedListFilePath)
    feedMgr.run(feedVistor)
    #epub.create(r'D:\zll\gae\test1\test1.epub')