def parse_exported_highlights(self, raw, log_failure=True): """ Extract highlights from pasted Annotation summary email Return True if no problems Return False if error """ # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Grab the title from the front of raw try: title = re.match(r'(?m)File: (?P<title>.*)$', raw).group('title') # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = 'Unknown' book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id gr_annotations = raw.split('\n') num_lines = len(gr_annotations) highlights = {} # Find the first annotation i = 0 line = gr_annotations[i] while not line.startswith('--- Page'): i += 1 line = gr_annotations[i] while i < num_lines and not line.startswith('(report generated by GoodReader)'): # Extract the page number page_num = re.search('--- (Page \w+) ---', line) if page_num: page_num = page_num.group(1) # Extract the highlight i += 1 line = gr_annotations[i] prefix = None while True: prefix = re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) if prefix and prefix.group('ann_type') in self.SKIP_TYPES: i += 1 line = gr_annotations[i] while not re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES)), line): i += 1 line = gr_annotations[i] continue elif prefix: break else: i += 1 line = gr_annotations[i] annotation = self._extract_highlight(line, prefix.group('ann_type')) annotation.page_num = page_num # Get the annotation(s) i += 1 line = gr_annotations[i] ann = '' while i < num_lines \ and not line.startswith('--- Page') \ and not line.startswith('(report generated by GoodReader)'): if line: prefix = re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) if prefix and prefix.group('ann_type') in self.SKIP_TYPES: # Continue until next ann_type i += 1 line = gr_annotations[i] while not re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES)), line): i += 1 if i == num_lines: break line = gr_annotations[i] continue elif prefix: # Additional highlight on the same page # write current annotation, start new annotation self._store_annotation(highlights, annotation) annotation = self._extract_highlight(line, prefix.group('ann_type')) annotation.page_num = page_num annotation.ann_type = prefix.group('ann_type') ann = '' i += 1 line = gr_annotations[i] continue if not ann: ann = line else: ann += '\n' + line i += 1 line = gr_annotations[i] annotation.ann = ann # Back up so that the next line is '--- Page' or '(report generated' i -= 1 self._store_annotation(highlights, annotation) i += 1 if i == num_lines: break line = gr_annotations[i] except: if log_failure: self._log(" unable to parse GoodReader Annotation summary") self._log("{:~^80}".format(" Imported Annotation summary ")) self._log(raw) self._log("{:~^80}".format(" end imported Annotations summary ")) import traceback traceback.print_exc() msg = ('Unable to parse Annotation summary from %s. ' % self.app_name + 'Paste entire contents of emailed summary.') MessageBox(MessageBox.WARNING, 'Error importing annotations', msg, show_copy_button=False, parent=self.opts.gui).exec_() self._log_location("WARNING: %s" % msg) return False # Finalize book_mi book_mi.annotations = len(highlights) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) sorted_keys = sorted(highlights.iterkeys()) for dt in sorted_keys: highlight_text = None if 'text' in highlights[dt]: highlight_text = highlights[dt]['text'] note_text = None if 'note' in highlights[dt]: note_text = highlights[dt]['note'] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = dt a_mi.book_id = book_mi['book_id'] a_mi.highlight_color = highlights[dt]['color'] a_mi.highlight_text = highlight_text a_mi.location = highlights[dt]['page'] a_mi.last_modification = dt a_mi.note_text = note_text # Location sort page_literal = re.match(r'^Page (?P<page>[0-9ivx]+).*$', a_mi.location).group('page') if re.match('[IXVL]', page_literal.upper()): whole = 0 decimal = self._roman_to_int(page_literal) else: whole = int(page_literal) decimal = 0 a_mi.location_sort = "%05d.%05d" % (whole, decimal) # Add annotation self.add_to_annotations_db(self.annotations_db, a_mi) self.update_book_last_annotation(self.books_db, dt, book_mi['book_id']) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() return True
def parse_exported_highlights(self, raw): """ Extract highlights from pasted Annotations summary, add them to selected book in calibre library Construct a BookStruct object with the book's metadata. Starred items are minimally required. BookStruct properties: *active: [True|False] *author: "John Smith" author_sort: (if known) *book_id: an int uniquely identifying the book. Highlights are associated with books through book_id genre: "Fiction" (if known) *title: "The Story of John Smith" title_sort: "Story of John Smith, The" (if known) uuid: Calibre's uuid for this book, if known Construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time """ self._log("%s:parse_exported_highlight()" % self.app_name) # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Populate author, title at a minimum title = "A Book With Some Exported Annotations" author = "John Smith" # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = author book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id book_mi.annotations = len(self.highlights) # Add annotations to the database for timestamp in sorted(self.highlights.keys()): book_mi.last_update = timestamp # Populate an AnnotationStruct ann_mi = AnnotationStruct() # Required items ann_mi.book_id = book_mi['book_id'] ann_mi.last_modification = timestamp # Optional items if 'annotation_id' in self.highlights[timestamp]: ann_mi.annotation_id = self.highlights[timestamp]['annotation_id'] if 'highlight_color' in self.highlights[timestamp]: ann_mi.highlight_color = self.highlights[timestamp]['highlight_color'] if 'highlight_text' in self.highlights[timestamp]: highlight_text = '\n'.join(self.highlights[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if 'note_text' in self.highlights[timestamp]: note_text = '\n'.join(self.highlights[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to annotations_db self.add_to_annotations_db(self.annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() # Return True if successful return True
def parse_exported_highlights(self, raw, log_failure=True): """ Extract highlights from pasted Annotation summary email Return True if no problems Return False if error """ # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None self._log("raw highlights: {0}".format(raw)) # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Grab the title from the front of raw try: title = re.match(r'(?m)File: (?P<title>.*)$', raw).group('title') self._log("title='{0}".format(title)) # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = 'Unknown' book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id gr_annotations = raw.split('\n') num_lines = len(gr_annotations) highlights = {} # Find the first annotation i = 0 line = gr_annotations[i] self._log("Looking for Page: Line number={0} line='{1}'".format( i, line)) while not line.startswith('--- Page'): self._log(" unable to parse GoodReader Annotation summary") i += 1 line = gr_annotations[i] self._log( "Looking for Page: Line number={0} line='{1}'".format( i, line)) while i < num_lines and not line.startswith( '(report generated by GoodReader)'): # Extract the page number page_num = re.search('--- (Page \w+) ---', line) self._log("regex result: page_num={0}".format(page_num)) if page_num: page_num = page_num.group(1) self._log("page_num={0}".format(page_num)) # Extract the highlight i += 1 line = gr_annotations[i] self._log( "Looking for annotation start: Line number={0} line='{1}'" .format(i, line)) prefix = None while True: prefix = re.search( '^(?P<ann_type>{0})'.format( '|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) self._log("Searched for prefix={0}".format(prefix)) if prefix and prefix.group( 'ann_type') in self.SKIP_TYPES: i += 1 line = gr_annotations[i] self._log( "Looking for annotation start: Line number={0} line='{1}'" .format(i, line)) while not re.search( '^(?P<ann_type>{0})'.format('|'.join( self.ANNOTATION_TYPES)), line): i += 1 line = gr_annotations[i] self._log( "Looking for annotation start after a SKIP type: Line number={0} line='{1}'" .format(i, line)) continue elif prefix: self._log( "Have annotation start: Line number={0} line='{1}' prefix={2}" .format(i, line, prefix)) break else: i += 1 line = gr_annotations[i] self._log( "Looking for annotation start 2: Line number={0} line='{1}'" .format(i, line)) annotation = self._extract_highlight( line, prefix.group('ann_type')) annotation.page_num = page_num self._log( "Started annotation: page_num={0} annotation='{1}'". format(page_num, annotation)) # Get the annotation(s) i += 1 line = gr_annotations[i] self._log( "Reading annotation text 1: Line number={0} line='{1}'" .format(i, line)) ann = '' while i < num_lines \ and not line.startswith('--- Page') \ and not line.startswith('(report generated by GoodReader)'): if line: prefix = re.search( '^(?P<ann_type>{0})'.format( '|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) if prefix and prefix.group( 'ann_type') in self.SKIP_TYPES: # Continue until next ann_type i += 1 line = gr_annotations[i] while not re.search( '^(?P<ann_type>{0})'.format('|'.join( self.ANNOTATION_TYPES)), line): i += 1 if i == num_lines: break line = gr_annotations[i] continue elif prefix: # Additional highlight on the same page # write current annotation, start new annotation self._store_annotation(highlights, annotation) annotation = self._extract_highlight( line, prefix.group('ann_type')) annotation.page_num = page_num annotation.ann_type = prefix.group('ann_type') ann = '' i += 1 line = gr_annotations[i] continue if not ann: ann = line else: ann += '\n' + line i += 1 line = gr_annotations[i] annotation.ann = ann # Back up so that the next line is '--- Page' or '(report generated' i -= 1 self._store_annotation(highlights, annotation) i += 1 if i == num_lines: break line = gr_annotations[i] except Exception as e: import traceback self._log("Exception parsing GoodReader Annotation summary: %s" % e) traceback.print_exc() if log_failure: self._log(" unable to parse GoodReader Annotation summary") self._log("{:~^80}".format(" Imported Annotation summary ")) self._log(raw) self._log( "{:~^80}".format(" end imported Annotations summary ")) import traceback traceback.print_exc() msg = ('Unable to parse Annotation summary from %s. ' % self.app_name + 'Paste entire contents of emailed summary.') MessageBox(MessageBox.WARNING, 'Error importing annotations', msg, show_copy_button=False, parent=self.opts.gui).exec_() self._log_location("WARNING: %s" % msg) return False # Finalize book_mi book_mi.annotations = len(highlights) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) sorted_keys = sorted(list(highlights.keys())) for dt in sorted_keys: highlight_text = None if 'text' in highlights[dt]: highlight_text = highlights[dt]['text'] note_text = None if 'note' in highlights[dt]: note_text = highlights[dt]['note'] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = dt a_mi.book_id = book_mi['book_id'] a_mi.highlight_color = highlights[dt]['color'] a_mi.highlight_text = highlight_text a_mi.location = highlights[dt]['page'] a_mi.last_modification = dt a_mi.note_text = note_text # Location sort page_literal = re.match(r'^Page (?P<page>[0-9ivx]+).*$', a_mi.location).group('page') if re.match('[IXVL]', page_literal.upper()): whole = 0 decimal = self._roman_to_int(page_literal) else: whole = int(page_literal) decimal = 0 a_mi.location_sort = "%05d.%05d" % (whole, decimal) # Add annotation self.add_to_annotations_db(self.annotations_db, a_mi) self.update_book_last_annotation(self.books_db, dt, book_mi['book_id']) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() return True
def parse_exported_highlights(self, raw, log_failure=True): """ Extract highlights from pasted Annotations summary, add them to selected book in calibre library Construct a BookStruct object with the book's metadata. Starred items are minimally required. BookStruct properties: *active: [True|False] *author: "John Smith" author_sort: (if known) *book_id: an int uniquely identifying the book. Highlights are associated with books through book_id genre: "Fiction" (if known) *title: "The Story of John Smith" title_sort: "Story of John Smith, The" (if known) uuid: Calibre's uuid for this book, if known Construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time """ # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) try: lines = raw.split('\n') if len(lines) < 5: raise AnnotationsException("Invalid annotations summary") index = 0 annotations = {} # Get the title, author, publisher from the first three lines title = lines[index] index += 1 author = lines[index] index += 1 publisher = lines[index] index += 1 # Next line should be the first timestamp/location while index < len(lines): tsl = re.match(r'^(?P<timestamp>.*) \((?P<location>Page .*)\)', lines[index]) if tsl: ts = tsl.group('timestamp') isoformat = parse_date(ts, as_utc=False) isoformat = isoformat.replace(hour=12) timestamp = mktime(isoformat.timetuple()) while timestamp in annotations: timestamp += 60 location = tsl.group('location') index += 1 # Continue with highlight highlight_text = lines[index] index += 1 # Next line is either Note: or a new tsl note = re.match(r'^Notes: (?P<note_text>.*)', lines[index]) note_text = None if note: note_text = note.group('note_text') index += 1 if re.match(r'^(?P<timestamp>.*) \((?P<location>Page .*)\)', lines[index]): # New note - store the old one, continue ann = AnnotationStruct() ann.book_id = mi.id ann.annotation_id = index ann.highlight_color = 'Yellow' ann.highlight_text = highlight_text ann.location = location ann.location_sort = "%05d" % int(re.match(r'^Page (?P<page>\d+).*$', location).group('page')) ann.note_text = note_text ann.last_modification = timestamp # Add annotation to db annotations[timestamp] = ann continue else: # Store the last one ann = AnnotationStruct() ann.book_id = mi.id ann.annotation_id = index ann.highlight_color = 'Yellow' ann.highlight_text = highlight_text ann.location = location ann.location_sort = "%05d" % int(re.match(r'^Page (?P<page>\d+).*$', location).group('page')) ann.note_text = note_text ann.last_modification = timestamp annotations[timestamp] = ann break except: if log_failure: self._log(" unable to parse %s Annotations" % self.app_name) self._log("{:~^80}".format(" Imported Annotation summary ")) self._log(raw) self._log("{:~^80}".format(" end imported Annotations summary ")) import traceback traceback.print_exc() msg = ('Unable to parse Annotation summary from %s. ' % self.app_name + 'Paste entire contents of emailed summary.') MessageBox(MessageBox.WARNING, 'Error importing annotations', msg, show_copy_button=False, parent=self.opts.gui).exec_() self._log_location("WARNING: %s" % msg) return False # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = author book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id book_mi.annotations = len(annotations) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Add the annotations for timestamp in sorted(annotations.keys()): self.add_to_annotations_db(self.annotations_db, annotations[timestamp]) self.update_book_last_annotation(self.books_db, timestamp, mi.id) self.opts.pb.increment() self.update_book_last_annotation(self.books_db, timestamp, mi.id) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() # Return True if successful return True
def parse_exported_highlights(self, raw): """ Extract highlights from pasted Annotations summary, add them to selected book in calibre library Construct a BookStruct object with the book's metadata. Starred items are minimally required. BookStruct properties: *active: [True|False] *author: "John Smith" author_sort: (if known) *book_id: an int uniquely identifying the book. Highlights are associated with books through book_id genre: "Fiction" (if known) *title: "The Story of John Smith" title_sort: "Story of John Smith, The" (if known) uuid: Calibre's uuid for this book, if known Construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time """ self._log("%s:parse_exported_highlight()" % self.app_name) # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Populate author, title at a minimum title = "A Book With Some Exported Annotations" author = "John Smith" # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = author book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id book_mi.annotations = len(self.highlights) # Add annotations to the database for timestamp in sorted(self.highlights.iterkeys()): book_mi.last_update = timestamp # Populate an AnnotationStruct ann_mi = AnnotationStruct() # Required items ann_mi.book_id = book_mi['book_id'] ann_mi.last_modification = timestamp # Optional items if 'annotation_id' in self.highlights[timestamp]: ann_mi.annotation_id = self.highlights[timestamp]['annotation_id'] if 'highlight_color' in self.highlights[timestamp]: ann_mi.highlight_color = self.highlights[timestamp]['highlight_color'] if 'highlight_text' in self.highlights[timestamp]: highlight_text = '\n'.join(self.highlights[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if 'note_text' in self.highlights[timestamp]: note_text = '\n'.join(self.highlights[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to annotations_db self.add_to_annotations_db(self.annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() # Return True if successful return True