def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current( db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT book_oid, last_access, book_annotations.location, book_annotations.book_position, note, book_annotations.oid as ba_oid FROM book_annotations JOIN book ON book.oid = book_annotations.book_oid ORDER BY book_annotations.book_position ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) annotations = {} timestamp = None for row in rows: self.opts.pb.increment() book_id = row[b'book_oid'] if not book_id in self.installed_books: continue # Annotations are quoted. Anything afterwards is a note. # Assuming that the user hasn't edited the opening/closing quotes, # we can assume that a sequence of '"\n' is a valid split point. full_annotation = row[b'note'] highlight_text = None note_text = None if full_annotation.startswith( '"') and full_annotation.endswith('"'): # Highlight only - strip opening/closing quotes highlight_text = [full_annotation[1:-1]] elif '"\n' in full_annotation: # Presumed to be a hybrid highlight/note, separated by closing quote/LF tokens = full_annotation.split('"\n') highlight_text = [tokens[0][1:]] note_text = tokens[1].split('\n') else: # User manually removed the quotes, assume it's just a note note_text = full_annotation.split('\n') # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'ba_oid'] a_mi.book_id = book_id a_mi.epubcfi = row[b'location'] a_mi.highlight_color = 'Yellow' if highlight_text: a_mi.highlight_text = '\n'.join(highlight_text) if note_text: a_mi.note_text = '\n'.join(note_text) section = self._get_spine_index(a_mi.epubcfi) try: a_mi.location = self.tocs[book_id]["%.0f" % (section)] except: a_mi.location = "Section %d" % section a_mi.location_sort = row[b'book_position'] # Stanza doesn't timestamp individual annotations # Space them 1 second apart timestamp = row[b'last_access'] while timestamp in annotations: timestamp += 1 a_mi.last_modification = timestamp + self.NSTimeIntervalSince1970 annotations[timestamp] = a_mi for timestamp in annotations: self.add_to_annotations_db(cached_db, annotations[timestamp]) # Update last_annotation in books_db if timestamp: self.update_book_last_annotation(books_db, timestamp, book_id) self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log_location("Start!!!!") self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) annotations_db = self.generate_annotations_db_name( self.app_name_, self.opts.device_name) self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) # Create the annotations table self.create_annotations_table(annotations_db) self._fetch_annotations() # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.set_maximum(len(self.active_annotations)) # Add annotations to the database for annotation in sorted( list(self.active_annotations.values()), key=lambda k: (k['book_id'], k['location_sort'], k['last_modification'])): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = annotation['book_id'] ann_mi.last_modification = annotation['last_modification'] # Optional items with PB modifications if 'annotation_id' in annotation: ann_mi.annotation_id = annotation['annotation_id'] if 'highlight_color' in annotation: if annotation['highlight_color'] == 'yellow': ann_mi.highlight_color = 'Yellow' elif annotation['highlight_color'] in ('cian', 'cyan'): ann_mi.highlight_color = 'Blue' else: ann_mi.highlight_color = 'Yellow' if 'highlight_text' in annotation: highlight_text = annotation['highlight_text'] ann_mi.highlight_text = highlight_text if 'note_text' in annotation: note_text = annotation['note_text'] ann_mi.note_text = note_text if 'page' in annotation: ann_mi.location = annotation['page'] if 'location_sort' in annotation: ann_mi.location_sort = "%08d" % annotation['location_sort'] if 'epubcfi' in annotation: ann_mi.epubcfi = annotation['epubcfi'] # Add annotation to annotations_db self.add_to_annotations_db(annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, ann_mi.last_modification, ann_mi.book_id) # Update the timestamp self.update_timestamp(annotations_db) self.commit() self._log_location("Finish!!!!")
def get_active_annotations(self): """ Fetch active iBooks annotations from AEAnnotation_*.sqlite """ self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current( db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT ZANNOTATIONASSETID, ZANNOTATIONLOCATION, ZANNOTATIONMODIFICATIONDATE, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONSTYLE, ZANNOTATIONUUID FROM ZAEANNOTATION WHERE ZANNOTATIONDELETED = 0 and ZANNOTATIONTYPE = 2 ORDER BY ZANNOTATIONMODIFICATIONDATE ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) for row in rows: self.opts.pb.increment() book_id = row[b'ZANNOTATIONASSETID'] if not book_id in self.installed_books: continue # Collect the metadata # Sanitize text, note to unicode highlight_text = re.sub('\xa0', ' ', row[b'ZANNOTATIONSELECTEDTEXT']) highlight_text = UnicodeDammit(highlight_text).unicode highlight_text = highlight_text.rstrip('\n').split('\n') while highlight_text.count(''): highlight_text.remove('') highlight_text = [line.strip() for line in highlight_text] note_text = None if row[b'ZANNOTATIONNOTE']: note_text = UnicodeDammit( row[b'ZANNOTATIONNOTE']).unicode note_text = note_text.rstrip('\n').split('\n')[0] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'ZANNOTATIONUUID'] a_mi.book_id = book_id a_mi.epubcfi = row[b'ZANNOTATIONLOCATION'] a_mi.highlight_color = self.HIGHLIGHT_COLORS[ row[b'ZANNOTATIONSTYLE']] a_mi.highlight_text = '\n'.join(highlight_text) a_mi.last_modification = row[ b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970 if a_mi.epubcfi: section = self._get_spine_index(a_mi.epubcfi) try: a_mi.location = self.tocs[book_id]["%.0f" % (section - 1)] except: a_mi.location = "Section %d" % section if self.collect_news_clippings and 'News' in self.get_genres( books_db, book_id): a_mi.location_sort = a_mi.last_modification else: a_mi.location_sort = self._generate_location_sort( a_mi.epubcfi) else: if self.collect_news_clippings and 'News' in self.get_genres( books_db, book_id): a_mi.location = self.get_title(books_db, book_id) a_mi.location_sort = a_mi.last_modification a_mi.note_text = note_text # Add annotation self.add_to_annotations_db(cached_db, a_mi) # Update last_annotation in books_db self.update_book_last_annotation( books_db, row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970, book_id) self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def get_active_annotations(self): """ Fetch active iBooks annotations from AEAnnotation_*.sqlite """ self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current(db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT ZANNOTATIONASSETID, ZANNOTATIONLOCATION, ZANNOTATIONMODIFICATIONDATE, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONSTYLE, ZANNOTATIONUUID FROM ZAEANNOTATION WHERE ZANNOTATIONDELETED = 0 and ZANNOTATIONTYPE = 2 ORDER BY ZANNOTATIONMODIFICATIONDATE ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) for row in rows: self.opts.pb.increment() book_id = row[b'ZANNOTATIONASSETID'] if not book_id in self.installed_books: continue # Collect the metadata # Sanitize text, note to unicode highlight_text = re.sub('\xa0', ' ', row[b'ZANNOTATIONSELECTEDTEXT']) highlight_text = UnicodeDammit(highlight_text).unicode highlight_text = highlight_text.rstrip('\n').split('\n') while highlight_text.count(''): highlight_text.remove('') highlight_text = [line.strip() for line in highlight_text] note_text = None if row[b'ZANNOTATIONNOTE']: note_text = UnicodeDammit(row[b'ZANNOTATIONNOTE']).unicode note_text = note_text.rstrip('\n').split('\n')[0] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'ZANNOTATIONUUID'] a_mi.book_id = book_id a_mi.epubcfi = row[b'ZANNOTATIONLOCATION'] a_mi.highlight_color = self.HIGHLIGHT_COLORS[row[b'ZANNOTATIONSTYLE']] a_mi.highlight_text = '\n'.join(highlight_text) a_mi.last_modification = row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970 if a_mi.epubcfi: section = self._get_spine_index(a_mi.epubcfi) try: a_mi.location = self.tocs[book_id]["%.0f" % (section - 1)] except: a_mi.location = "Section %d" % section if self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id): a_mi.location_sort = a_mi.last_modification else: a_mi.location_sort = self._generate_location_sort(a_mi.epubcfi) else: if self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id): a_mi.location = self.get_title(books_db, book_id) a_mi.location_sort = a_mi.last_modification a_mi.note_text = note_text # Add annotation self.add_to_annotations_db(cached_db, a_mi) # Update last_annotation in books_db self.update_book_last_annotation(books_db, row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970, book_id) self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)