コード例 #1
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_cover_image(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.cover_image_extension, '.jpg')
     self.assertIsNotNone(data.cover_image_content)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.cover_image_extension, '.png')
     self.assertIsNotNone(data.cover_image_content)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.cover_image_extension, '.png')
     self.assertIsNotNone(data.cover_image_content)
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.cover_image_extension, None)
     self.assertEqual(data.cover_image_content, None)
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.cover_image_extension, '.jpg')
     self.assertIsNotNone(data.cover_image_content)
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.cover_image_extension, '.jpg')
     self.assertIsNotNone(data.cover_image_content)
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.cover_image_extension, '.png')
     self.assertIsNotNone(data.cover_image_content)
コード例 #2
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_identifiers(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.identifiers,
                      ['urn:uuid:d1d91a1f-031f-49c0-83ff-2f556aa0c4d5'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.identifiers,
                      ['code.google.com.epub-samples.georgia-cfi'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.identifiers,
                      ['code.google.com.epub-samples.georgia-pls-ssml'])
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.identifiers,
                      ['http://boolesrings.org/krautzberger'])
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.identifiers,
                      ['code.google.com.epub-samples.moby-dick-basic'])
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(
         data.identifiers,
         ['bf50c6e1-eb0a-4a1c-a2cd-ea8809ae086a', '9781430218333'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.identifiers, ['_id253509'])
コード例 #3
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_run(self):
     samples = ('backbone-fundamentals.epub', 'georgia-cfi-20120521.epub',
                'georgia-pls-ssml-20120322.epub', 'mathjax_tests.epub',
                'moby-dick.epub', 'progit.epub')
     for sample in samples:
         data = get_epub_metadata(os.path.join(dir_path, sample),
                                  read_cover_image=False,
                                  read_toc=True)
         print(json.dumps(data, indent=4))
コード例 #4
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_encoding(self):
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     if IS_PY2:
         self.assertEqual(type(data.title), unicode)
         self.assertEqual(type(data.toc[0]), dict)
         self.assertEqual(type(data.toc[1]), dict)
     else:
         self.assertEqual(type(data.title), str)
         self.assertEqual(type(data.toc[0]), dict)
         self.assertEqual(type(data.toc[1]), dict)
コード例 #5
0
def analyze_epub(path):
    try:
        epub_metadata = get_epub_metadata(path)
        date = epub_metadata.get('publication_date')
        year = get_year_from_date_string(date)
        author = epub_metadata['authors'][0]
        title = epub_metadata['title']
        return (author, title, year, path, 'epub')
    except:
        return None
コード例 #6
0
def get_epub_info(file):

    temp_path = "temp_epub"

    file.save(temp_path)
#    temp_path = os.path.join("./",)

    output = epub_meta.get_epub_metadata(temp_path,read_cover_image=False, read_toc=True)
    output = dict(output)

    #Cleanup server
    os.remove(temp_path)

    return output
コード例 #7
0
def get_epub_meta_data(path):
    meta = epub_meta.get_epub_metadata(path)

    # Format authors
    if 'authors' in meta.keys():
        authors = ""
        for i, author in enumerate(meta['authors']):
            if i:
                authors += '; ' + author
            else:
                authors += author
    else:
        authors = None

    # Format Publish Date
    if 'publication_date' in meta.keys():
        try:
            publish_date = format_publish_date(meta['publication_date'][:10],
                                               '%Y-%m-%d')
        except ValueError or AttributeError or TypeError:
            publish_date = None
    else:
        publish_date = None

    # Format description
    if meta['description']:
        cleanr = re.compile('<.*?>')
        description = re.sub(cleanr, '', meta['description'])
        description = description.replace('\n', ' ').replace('\t', ' '). \
            replace('\r', ' ')
        cleanr2 = re.compile(' +')
        description = re.sub(cleanr2, ' ', description)
    else:
        description = None

    # Format language
    if 'language' in meta.keys():
        language = meta['language']
    else:
        if description:
            language = detect_language(description)
        else:
            language = detect_language(meta['title'])

    return {'authors': authors, 'title': meta['title'], 'file_type': 'epub',
            'publish_date': publish_date, 'language': language,
            'description': description}
コード例 #8
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_publication_date(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.publication_date, None)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.publication_date, None)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.publication_date, None)
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.publication_date, None)
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.publication_date, None)
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.publication_date, '2009-08-19T00:00:00+00:00')
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.publication_date, None)
コード例 #9
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_publisher(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.publisher, None)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.publisher, None)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.publisher, None)
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.publisher, None)
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.publisher, 'Harper & Brothers, Publishers')
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.publisher, 'Springer')
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.publisher, None)
コード例 #10
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_authors(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.authors, ['Addy Osmani'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.authors, ['Various'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.authors, ['Various'])
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.authors, ['Peter Krautzberger'])
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.authors, ['Herman Melville'])
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.authors, ['Scott Chacon'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.authors, ['Charles Severance', 'Kevin Dowd'])
コード例 #11
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_language(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.language, 'en-US')
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.language, 'en-US')
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.language, 'en-US')
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.language, 'en')
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.language, 'en-US')
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.language, 'en')
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.language, 'en')
コード例 #12
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_subject(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.subject, [])
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.subject, [])
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.subject, [])
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.subject, [])
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.subject, [])
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.subject, ['Software Development'])
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.subject, [])
コード例 #13
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_file_size(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.file_size_in_bytes, 325803)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.file_size_in_bytes, 1095025)
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.file_size_in_bytes, 546553)
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.file_size_in_bytes, 809373)
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.file_size_in_bytes, 1668149)
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.file_size_in_bytes, 4346158)
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.file_size_in_bytes, 3045262)
コード例 #14
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_title(self):
     data = get_epub_metadata(
         os.path.join(dir_path, 'backbone-fundamentals.epub'))
     self.assertEqual(data.title, 'Developing Backbone.js Applications')
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
     self.assertEqual(data.title, 'Georgia')
     data = get_epub_metadata(
         os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
     self.assertEqual(data.title, 'Georgia')
     data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
     self.assertEqual(data.title,
                      'Gathering a few MathML torture tests -- no MathJax')
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
     self.assertEqual(data.title, 'Moby-Dick')
     data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
     self.assertEqual(data.title, 'Pro Git')
     data = get_epub_metadata(
         os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
     self.assertEqual(data.title, 'High Performance Computing')
コード例 #15
0
ファイル: extractors.py プロジェクト: sleepy771/filecrawl
def epub_metadata_extractor(filepath):
    meta_data = dict(
        get_epub_metadata(filepath, read_cover_image=False, read_toc=False))
    if is_invalid_date(meta_data['publication_date']):
        meta_data['publication_date'] = None
    return meta_data
コード例 #16
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_inexistent_file(self):
     try:
         get_epub_metadata(os.path.join(dir_path, 'inexistent.epub'))
         self.assertEqual(1, 0)
     except EPubException:
         pass
コード例 #17
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
 def test_relative_path(self):
     # This book's cover has a relative path (sits at zip file root)
     data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'),
                              read_cover_image=True)
     self.assertTrue('cover_image_content' in data)
コード例 #18
0
ファイル: tests.py プロジェクト: mcepl/epub-meta
    def test_toc(self):
        data = get_epub_metadata(
            os.path.join(dir_path, 'backbone-fundamentals.epub'))
        self.assertEqual(data.toc, [{
            'index': 0,
            'title': 'Title Page',
            'src': 'title_page.xhtml',
            'level': 0
        }, {
            'title': 'MongoDB Ruby Driver',
            'index': 1,
            'src': 'ch2.xhtml',
            'level': 0
        }, {
            'index': 2,
            'level': 0,
            'title': 'Practical',
            'src': 'ch3.xhtml'
        }, {
            'title': 'Unit Testing Backbone Applications With Jasmine',
            'index': 3,
            'src': 'ch4.xhtml',
            'level': 0
        }, {
            'title':
            'Unit Testing Backbone Applications With QUnit And SinonJS',
            'src': 'ch5.xhtml',
            'level': 0,
            'index': 4
        }, {
            'level': 0,
            'index': 5,
            'src': 'ch6.xhtml',
            'title': 'QUnit'
        }, {
            'title': 'SinonJS',
            'src': 'ch7.xhtml',
            'level': 0,
            'index': 6
        }, {
            'level': 0,
            'title': 'Practical',
            'index': 7,
            'src': 'ch8.xhtml'
        }])
        data = get_epub_metadata(
            os.path.join(dir_path, 'georgia-cfi-20120521.epub'))
        self.assertEqual(len(data.toc), 10)
        data = get_epub_metadata(
            os.path.join(dir_path, 'georgia-pls-ssml-20120322.epub'))
        self.assertEqual(len(data.toc), 17)
        data = get_epub_metadata(os.path.join(dir_path, 'mathjax_tests.epub'))
        self.assertEqual(len(data.toc), 6)
        data = get_epub_metadata(os.path.join(dir_path, 'moby-dick.epub'))
        self.assertEqual(len(data.toc), 143)
        data = get_epub_metadata(os.path.join(dir_path, 'progit.epub'))
        self.assertEqual(data.toc, [{
            'src': 'progit_split_000.html',
            'title': 'Getting Started',
            'level': 0,
            'index': 0
        }, {
            'title': 'Git Basics',
            'level': 0,
            'src': 'progit_split_008.html',
            'index': 1
        }, {
            'index': 2,
            'title': 'Git Branching',
            'level': 0,
            'src': 'progit_split_017.html'
        }, {
            'title': 'Git on the Server',
            'src': 'progit_split_025.html',
            'level': 0,
            'index': 3
        }, {
            'title': 'Distributed Git',
            'src': 'progit_split_037.html',
            'level': 0,
            'index': 4
        }, {
            'src': 'progit_split_042.html',
            'title': 'Git Tools',
            'index': 5,
            'level': 0
        }, {
            'src': 'progit_split_051.html',
            'title': 'Customizing Git',
            'level': 0,
            'index': 6
        }, {
            'index': 7,
            'src': 'progit_split_057.html',
            'title': 'Git and Other Systems',
            'level': 0
        }, {
            'index': 8,
            'title': 'Git Internals',
            'src': 'progit_split_061.html',
            'level': 0
        }])

        data = get_epub_metadata(
            os.path.join(dir_path, 'high-performance-computing-5.2.epub'))
        self.assertEqual(
            data.toc, [{
                "src": "index.html",
                "level": 0,
                "index": 0,
                "title": "High Performance Computing"
            }, {
                "src": "pr01.html",
                "level": 1,
                "index": 1,
                "title": "Introduction to the Connexions Edition"
            }, {
                "src": "pr02.html",
                "level": 1,
                "index": 2,
                "title": "Introduction to High Performance Computing"
            }, {
                "src": "ch01.html",
                "level": 1,
                "index": 3,
                "title": "1. Modern Computer Architectures"
            }, {
                "src": "ch02.html",
                "level": 1,
                "index": 4,
                "title": "2. Programming and Tuning Software"
            }, {
                "src": "ch03.html",
                "level": 1,
                "index": 5,
                "title": "3. Shared-Memory Parallel Processors"
            }, {
                "src": "ch04.html",
                "level": 1,
                "index": 6,
                "title": "4. Scalable Parallel Processing"
            }, {
                "src": "ch05.html",
                "level": 1,
                "index": 7,
                "title": "5. Appendixes"
            }, {
                "src": "ix01.html",
                "level": 1,
                "index": 8,
                "title": "Index"
            }, {
                "src": "co01.html",
                "level": 1,
                "index": 9,
                "title": "Attributions"
            }, {
                "src": "co02.html",
                "level": 1,
                "index": 10,
                "title": "About Connexions"
            }])
コード例 #19
0
if __name__ == '__main__':
    import sys
    from pprint import pprint
    from epub_meta import get_epub_metadata
    dirpath = sys.argv[1]
    data = get_epub_metadata(dirpath, read_cover_image=False, read_toc=True)
    pprint(data)