def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, 'replace') # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ('DC.title', 'DCTERMS.title', 'Title'): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(',', ';') else: for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut', 'DC.creator'): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r'&(\S+)?;') if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r'[^0-9xX]', '', isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ('DC.language', 'DCTERMS.language'): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ('Pubdate', 'Date of publication', 'DC.date.published', 'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ('Timestamp', 'Date of creation', 'DC.date.created', 'DC.date.creation', 'DCTERMS.created'): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r'\[([.0-9]+)\]') match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), '').strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2. if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [ x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",") ] # Ready to return MetaInformation return mi
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle=unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle,defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id,index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id,value,label=label,commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]"%self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count+1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.') if prefs['editmetadata']: confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+ editconfig_txt+'\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id,index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous) finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm('<p>'+ '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.'), editconfig_txt])+ '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def _do_split(self, db, source_id, misource, splitepub, origlines, newspecs, deftitle=None): linenums, changedtocs, checkedalways = newspecs # logger.debug("updated tocs:%s"%changedtocs) if not self.has_lines(linenums): return #logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() #logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytoctitle']: if linenums[0] in changedtocs: deftitle = changedtocs[linenums[0]][0] # already unicoded()'ed elif len(origlines[linenums[0]]['toc']) > 0: deftitle = unicode(origlines[linenums[0]]['toc'][0]) #logger.debug("deftitle:%s"%deftitle) if not deftitle and prefs['copytitle']: deftitle = _("%s Split") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments #logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) #logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in six.iteritems(prefs['custom_cols']): #logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: #logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] #logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) #logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] if custom_columns[prefs['sourcecol']]['datatype'] == 'series': val = val + (" [%s]" % self.book_count) db.set_custom(book_id, val, label=label, commit=False) self.book_count = self.book_count + 1 db.commit() #logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() editconfig_txt = _( 'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.' ) if prefs['editmetadata']: confirm( '\n' + _('''The book for the new Split EPUB has been created and default metadata filled in. However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows. You can fill in the metadata yourself, or use download metadata for known books. If you download or add a cover image, it will be included in the generated EPUB.''' ) + '\n\n' + editconfig_txt + '\n', 'epubsplit_created_now_edit_again', self.gui) self.gui.iactions['Edit Metadata'].edit_metadata(False) try: QApplication.setOverrideCursor(QCursor(Qt.WaitCursor)) #logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None if mi.has_cover: # grab the path to the real image. coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') outlist = list(set(linenums + checkedalways)) outlist.sort() splitepub.write_split_epub(outputepub, outlist, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) #logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) #logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed( current, self.previous) if self.gui.cover_flow: self.gui.cover_flow.dataChanged() finally: QApplication.restoreOverrideCursor() if not prefs['editmetadata']: confirm( '<p>' + '</p><p>'.join([ _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.' ) % (mi.title, ', '.join(mi.authors)), _('EpubSplit now skips the Edit Metadata step by default.' ), editconfig_txt ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)
def _do_split(self, db, source_id, misource, splitepub, newspecs, deftitle=None, editmeta=True): linenums, changedtocs = newspecs # logger.debug("updated tocs:%s"%changedtocs) # logger.debug("2:%s"%(time.time()-self.t)) self.t = time.time() # logger.debug("linenums:%s"%linenums) defauthors = None if not deftitle and prefs['copytitle']: deftitle = _("نمونه %s") % misource.title if prefs['copyauthors']: defauthors = misource.authors mi = MetaInformation(deftitle, defauthors) if prefs['copytags']: mi.tags = misource.tags # [item for sublist in tagslists for item in sublist] if prefs['copylanguages']: mi.languages = misource.languages if prefs['copyseries']: mi.series = misource.series if prefs['copydate']: mi.timestamp = misource.timestamp if prefs['copyrating']: mi.rating = misource.rating if prefs['copypubdate']: mi.pubdate = misource.pubdate if prefs['copypublisher']: mi.publisher = misource.publisher if prefs['copyidentifiers']: mi.set_identifiers(misource.get_identifiers()) if prefs['copycomments'] and misource.comments: mi.comments = _("Split from:") + "\n\n" + misource.comments # logger.debug("mi:%s"%mi) book_id = db.create_book_entry(mi, add_duplicates=True) if prefs['copycover'] and misource.has_cover: db.set_cover(book_id, db.cover(source_id, index_is_id=True)) # logger.debug("3:%s"%(time.time()-self.t)) self.t = time.time() custom_columns = self.gui.library_view.model().custom_columns for col, action in prefs['custom_cols'].iteritems(): # logger.debug("col: %s action: %s"%(col,action)) if col not in custom_columns: # logger.debug("%s not an existing column, skipping."%col) continue coldef = custom_columns[col] # logger.debug("coldef:%s"%coldef) label = coldef['label'] value = db.get_custom(source_id, label=label, index_is_id=True) if value: db.set_custom(book_id, value, label=label, commit=False) # logger.debug("3.5:%s"%(time.time()-self.t)) self.t = time.time() if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \ and prefs['sourcetemplate']: val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource) # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val)) label = custom_columns[prefs['sourcecol']]['label'] db.set_custom(book_id, val, label=label, commit=False) db.commit() # logger.debug("4:%s"%(time.time()-self.t)) self.t = time.time() self.gui.library_view.model().books_added(1) self.gui.library_view.select_rows([book_id]) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() # if editmeta: # confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n', # 'epubsplit_created_now_edit_again', # self.gui) # # self.gui.iactions['Edit Metadata'].edit_metadata(False) # logger.debug("5:%s"%(time.time()-self.t)) self.t = time.time() self.gui.tags_view.recount() self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000) mi = db.get_metadata(book_id, index_is_id=True) outputepub = PersistentTemporaryFile(suffix='.epub') coverjpgpath = None # if mi.has_cover: # # grab the path to the real image. # coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg') splitepub.write_split_epub(outputepub, linenums, changedtocs=changedtocs, authoropts=mi.authors, titleopt=mi.title, descopt=mi.comments, tags=mi.tags, languages=mi.languages, coverjpgpath=coverjpgpath) # logger.debug("6:%s"%(time.time()-self.t)) self.t = time.time() db.add_format_with_hooks(book_id, 'EPUB', outputepub, index_is_id=True) # logger.debug("7:%s"%(time.time()-self.t)) self.t = time.time() self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000) self.gui.library_view.model().refresh_ids([book_id]) self.gui.tags_view.recount() current = self.gui.library_view.currentIndex() self.gui.library_view.model().current_changed(current, self.previous)
def get_metadata_(src, encoding=None): if not isinstance(src, unicode): if not encoding: src = xml_to_unicode(src)[0] else: src = src.decode(encoding, "replace") # Meta data definitions as in # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9 # Title title = None pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) src = src[:150000] # Searching shouldn't take too long match = pat.search(src) if match: title = match.group(2) else: for x in ("DC.title", "DCTERMS.title", "Title"): pat = get_meta_regexp_(x) match = pat.search(src) if match: title = match.group(1) break if not title: pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE) match = pat.search(src) if match: title = match.group(1) # Author author = None pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: author = match.group(2).replace(",", ";") else: for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"): pat = get_meta_regexp_(x) match = pat.search(src) if match: author = match.group(1) break # Create MetaInformation with Title and Author ent_pat = re.compile(r"&(\S+)?;") if title: title = ent_pat.sub(entity_to_unicode, title) if author: author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher publisher = None pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL) match = pat.search(src) if match: publisher = match.group(2) else: for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"): pat = get_meta_regexp_(x) match = pat.search(src) if match: publisher = match.group(1) break if publisher: mi.publisher = ent_pat.sub(entity_to_unicode, publisher) # ISBN isbn = None pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: isbn = match.group(1) else: for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"): pat = get_meta_regexp_(x) match = pat.search(src) if match: isbn = match.group(1) break if isbn: mi.isbn = re.sub(r"[^0-9xX]", "", isbn) # LANGUAGE language = None pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: language = match.group(1) else: for x in ("DC.language", "DCTERMS.language"): pat = get_meta_regexp_(x) match = pat.search(src) if match: language = match.group(1) break if language: mi.language = language # PUBDATE pubdate = None pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: pubdate = match.group(1) else: for x in ( "Pubdate", "Date of publication", "DC.date.published", "DC.date.publication", "DC.date.issued", "DCTERMS.issued", ): pat = get_meta_regexp_(x) match = pat.search(src) if match: pubdate = match.group(1) break if pubdate: try: mi.pubdate = parse_date(pubdate) except: pass # TIMESTAMP timestamp = None pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: timestamp = match.group(1) else: for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"): pat = get_meta_regexp_(x) match = pat.search(src) if match: timestamp = match.group(1) break if timestamp: try: mi.timestamp = parse_date(timestamp) except: pass # SERIES series = None pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: series = match.group(1) else: pat = get_meta_regexp_("Series") match = pat.search(src) if match: series = match.group(1) if series: pat = re.compile(r"\[([.0-9]+)\]") match = pat.search(series) series_index = None if match is not None: try: series_index = float(match.group(1)) except: pass series = series.replace(match.group(), "").strip() mi.series = ent_pat.sub(entity_to_unicode, series) if series_index is None: pat = get_meta_regexp_("Seriesnumber") match = pat.search(src) if match: try: series_index = float(match.group(1)) except: pass if series_index is not None: mi.series_index = series_index # RATING rating = None pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: rating = match.group(1) else: pat = get_meta_regexp_("Rating") match = pat.search(src) if match: rating = match.group(1) if rating: try: mi.rating = float(rating) if mi.rating < 0: mi.rating = 0 if mi.rating > 5: mi.rating /= 2.0 if mi.rating > 5: mi.rating = 0 except: pass # COMMENTS comments = None pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: comments = match.group(1) else: pat = get_meta_regexp_("Comments") match = pat.search(src) if match: comments = match.group(1) if comments: mi.comments = ent_pat.sub(entity_to_unicode, comments) # TAGS tags = None pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL) match = pat.search(src) if match: tags = match.group(1) else: pat = get_meta_regexp_("Tags") match = pat.search(src) if match: tags = match.group(1) if tags: mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")] # Ready to return MetaInformation return mi