コード例 #1
0
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''
        # Sample annotations, indexed by timestamp. Note that annotations may have
        # highlight_text, note_text, or both.
        dict_of_anns = {}
        ts = datetime.datetime(2012, 12, 4, 8, 15, 0)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id':
            1,
            'highlight_color':
            'Gray',
            'highlight_text': [
                'The first paragraph of the first highlight.',
                'The second paragaph of the first highlight.'
            ],
        }
        ts = ts.replace(minute=16)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id':
            1,
            'highlight_color':
            'Gray',
            'highlight_text': [
                'The first paragraph of the second highlight.',
                'The second paragaph of the second highlight.'
            ],
            'note_text': ['A note added to the second highlight']
        }
        ts = ts.replace(minute=17)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id': 1,
            'highlight_color': 'Gray',
            'note_text': ['A note added to the third highlight']
        }

        ts = datetime.datetime(2012, 12, 10, 9, 0, 0)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id':
            2,
            'highlight_color':
            'Gray',
            'highlight_text': [
                'The first paragraph of the first highlight.',
                'The second paragaph of the first highlight.'
            ]
        }
        ts = ts.replace(minute=1)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id':
            2,
            'highlight_color':
            'Gray',
            'highlight_text': [
                'The first paragraph of the second highlight.',
                'The second paragaph of the second highlight.'
            ],
            'note_text': ['A note added to the second highlight']
        }
        ts = ts.replace(minute=2)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id': 2,
            'highlight_color': 'Gray',
            'note_text': ['A note added to the third highlight']
        }

        ts = datetime.datetime(2012, 12, 31, 23, 59, 0)
        dict_of_anns[time.mktime(ts.timetuple())] = {
            'book_id': 999,
            'highlight_color': 'Gray',
            'highlight_text': ['An orphan annotation (no book)']
        }

        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" %
                               self.app_name)
        self.opts.pb.set_value(0)

        annotations_db = self.generate_annotations_db_name(
            self.app_name_, self.opts.device_name)
        books_db = self.generate_books_db_name(self.app_name_,
                                               self.opts.device_name)

        # Create the annotations table
        self.create_annotations_table(annotations_db)

        # Initialize the progress bar
        self.opts.pb.set_label("Getting highlights from %s" % self.app_name)
        self.opts.pb.set_value(0)
        self.opts.pb.set_maximum(len(dict_of_anns))

        # Add annotations to the database
        for timestamp in sorted(dict_of_anns.iterkeys()):
            # Populate an AnnotationStruct with available data
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = dict_of_anns[timestamp]['book_id']
            ann_mi.last_modification = timestamp

            # Optional items
            if 'annotation_id' in dict_of_anns[timestamp]:
                ann_mi.annotation_id = dict_of_anns[timestamp]['annotation_id']
            if 'highlight_color' in dict_of_anns[timestamp]:
                ann_mi.highlight_color = dict_of_anns[timestamp][
                    'highlight_color']
            if 'highlight_text' in dict_of_anns[timestamp]:
                highlight_text = '\n'.join(
                    dict_of_anns[timestamp]['highlight_text'])
                ann_mi.highlight_text = highlight_text
            if 'note_text' in dict_of_anns[timestamp]:
                note_text = '\n'.join(dict_of_anns[timestamp]['note_text'])
                ann_mi.note_text = note_text

            # Add annotation to annotations_db
            self.add_to_annotations_db(annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            # Update last_annotation in books_db
            self.update_book_last_annotation(books_db, timestamp,
                                             ann_mi.book_id)

        # Update the timestamp
        self.update_timestamp(annotations_db)
        self.commit()
コード例 #2
0
    def get_active_annotations(self):

        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" %
                               self.app_name)
        self.opts.pb.set_value(0)

        db_profile = self._localize_database_path(self.app_id,
                                                  self.annotations_subpath)
        self.annotations_db = db_profile['path']

        # Test timestamp against cached value
        cached_db = self.generate_annotations_db_name(self.app_name_,
                                                      self.ios.device_name)
        books_db = self.generate_books_db_name(self.app_name_,
                                               self.ios.device_name)

        if self.opts.disable_caching or not self._cache_is_current(
                db_profile['stats'], cached_db):
            self._log(" fetching annotations from %s on %s" %
                      (self.app_name, self.ios.device_name))

            # Create the annotations table as needed
            self.create_annotations_table(cached_db)
            obsolete_bookmarks = 0
            deleted_bookmarks = 0

            con = sqlite3.connect(self.annotations_db)
            with con:
                con.row_factory = sqlite3.Row
                cur = con.cursor()
                cur.execute('''SELECT * FROM Highlights
                               ORDER BY NoteDateTime
                            ''')
                rows = cur.fetchall()
                self.opts.pb.set_maximum(len(rows))
                for row in rows:
                    self.opts.pb.increment()

                    book_id = row[b'BookID']
                    if not book_id in self.installed_books:
                        obsolete_bookmarks += 1
                        continue

                    # Collect the markup/highlight count for all installed books
                    if row[b'Deleted'] == 1:
                        deleted_bookmarks += 1
                        continue

                    this_is_news = self.collect_news_clippings and 'News' in self.get_genres(
                        books_db, book_id)

                    # Sanitize text, note to unicode
                    highlight_text = re.sub('\xa0', ' ', row[b'Text'])
                    highlight_text = UnicodeDammit(highlight_text).unicode
                    highlight_text = highlight_text.rstrip('\n').split('\n')
                    while highlight_text.count(''):
                        highlight_text.remove('')
                    highlight_text = [line.strip() for line in highlight_text]

                    note_text = None
                    if row[b'Note']:
                        ntu = UnicodeDammit(row[b'Note']).unicode
                        note_text = ntu.rstrip('\n')

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = row[b'UUID']
                    a_mi.book_id = book_id
                    a_mi.highlight_color = self.HIGHLIGHT_COLORS[
                        row[b'Colour']]
                    a_mi.highlight_text = '\n'.join(highlight_text)
                    a_mi.last_modification = row[b'NoteDateTime']

                    section = str(int(row[b'Section']) - 1)
                    try:
                        a_mi.location = self.tocs[book_id][section]
                    except:
                        if this_is_news:
                            a_mi.location = self.get_title(books_db, book_id)
                        else:
                            a_mi.location = "Section %s" % row[b'Section']

                    a_mi.note_text = note_text

                    # If empty highlight_text and empty note_text, not a useful annotation
                    if not highlight_text and not note_text:
                        continue

                    # Generate location_sort
                    if this_is_news:
                        a_mi.location_sort = row[b'NoteDateTime']
                    else:
                        interior = self._generate_interior_location_sort(
                            row[b'StartXPath'])
                        if not interior:
                            self._log("Marvin: unable to parse xpath:")
                            self._log(row[b'StartXPath'])
                            self._log(a_mi)
                            continue

                        a_mi.location_sort = "%04d.%s.%04d" % (int(
                            row[b'Section']), interior, int(
                                row[b'StartOffset']))

                    # Add annotation
                    self.add_to_annotations_db(cached_db, a_mi)

                    # Update last_annotation in books_db
                    self.update_book_last_annotation(books_db,
                                                     row[b'NoteDateTime'],
                                                     book_id)

                # Update the timestamp
                self.update_timestamp(cached_db)
                self.commit()

        else:
            self._log(" retrieving cached annotations from %s" % cached_db)
コード例 #3
0
        def _process_individual_book(book):
            book_mi = BookStruct()
            book_mi['reader_app'] = self.app_name
            book_mi['cid'] = None
            for md in xl:
                book_mi[xl[md]] = book.get(md)
            book_mi['active'] = True
            book_mi['annotations'] = 0
            subjects = book.find('subjects')
            if subjects is not None:
                sl = [s.text for s in subjects]
                book_mi['genre'] = ', '.join(sl)

            this_is_news = False
            if 'News' in book_mi['genre']:
                if not self.collect_news_clippings:
                    return
                this_is_news = True

            # Get the last update, count active annotations
            last_update = 0
            hls = book.find('highlights')
            for hl in hls:
                this_ts = hl.get('datetime')
                if this_ts > last_update:
                    last_update = this_ts
                if hl.get('deleted') == '0':
                    book_mi['annotations'] += 1
            book_mi['last_update'] = float(last_update)

            # Get the library cid, confidence
            toc_entries = None
            if this_is_news:
                cid = self.news_clippings_cid
                confidence = 5
            else:
                cid, confidence = self.parent.generate_confidence(book_mi)
                if confidence >= 2:
                    toc_entries = self._get_epub_toc(cid=cid)

            # Add annotated book to the db, master_list
            if len(hls):
                self.add_to_books_db(self.books_db, book_mi)
                self.annotated_book_list.append(book_mi)

                # Add the active annotations for this book to the db
                highlights = {}
                for hl in hls:
                    if hl.get('deleted') == '1':
                        continue
                    datetime = hl.get('datetime')
                    highlights[datetime] = {}
                    for md in [
                            'text', 'note', 'color', 'key', 'deleted',
                            'section', 'startx', 'startoffset'
                    ]:
                        highlights[datetime][md] = hl.get(md)

                sorted_keys = sorted(highlights.iterkeys())
                for datetime in sorted_keys:
                    highlight_text = highlights[datetime]['text']
                    note_text = highlights[datetime]['note']

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = highlights[datetime]['key']
                    a_mi.book_id = book_mi['book_id']
                    a_mi.highlight_color = self.HIGHLIGHT_COLORS[int(
                        highlights[datetime]['color'])]
                    a_mi.highlight_text = highlight_text
                    a_mi.last_modification = datetime
                    try:
                        section = str(int(highlights[datetime]['section']) - 1)
                        a_mi.location = toc_entries[section]
                    except:
                        if this_is_news:
                            a_mi.location = book_mi['title']
                        else:
                            a_mi.location = "Section %s" % highlights[
                                datetime]['section']
                    a_mi.note_text = note_text

                    # If empty highlight_text and empty note_text, not a useful annotation
                    if (not highlight_text.strip() and not note_text.strip()):
                        continue

                    # Generate location_sort
                    if this_is_news:
                        a_mi.location_sort = datetime
                    else:
                        interior = self._generate_interior_location_sort(
                            highlights[datetime]['startx'])
                        if not interior:
                            self._log("Marvin: unable to parse xpath:")
                            self._log(" %s" % highlights[datetime]['startx'])
                            self._log(a_mi)
                            continue

                        a_mi.location_sort = "%04d.%s.%04d" % (
                            int(highlights[datetime]['section']), interior,
                            int(highlights[datetime]['startoffset']))

                    self.add_to_annotations_db(self.annotations_db, a_mi)
                    self.update_book_last_annotation(self.books_db, datetime,
                                                     book_mi['book_id'])

            # Update the timestamps
            self.update_timestamp(self.annotations_db)
            self.update_timestamp(self.books_db)
            self.commit()
コード例 #4
0
    def parse_exported_highlights(self, raw):
        """
        Extract highlights from pasted Annotations summary, add them to selected book
        in calibre library

        Construct a BookStruct object with the book's metadata.
        Starred items are minimally required.
           BookStruct properties:
            *active: [True|False]
            *author: "John Smith"
             author_sort: (if known)
            *book_id: an int uniquely identifying the book.
                     Highlights are associated with books through book_id
             genre: "Fiction" (if known)
            *title: "The Story of John Smith"
             title_sort: "Story of John Smith, The" (if known)
             uuid: Calibre's uuid for this book, if known

        Construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time

        """
        self._log("%s:parse_exported_highlight()" % self.app_name)

        # Create the annotations, books table as needed
        self.annotations_db = "%s_imported_annotations" % self.app_name_
        self.create_annotations_table(self.annotations_db)
        self.books_db = "%s_imported_books" % self.app_name_
        self.create_books_table(self.books_db)

        self.annotated_book_list = []
        self.selected_books = None

        # Generate the book metadata from the selected book
        row = self.opts.gui.library_view.currentIndex()
        book_id = self.opts.gui.library_view.model().id(row)
        db = self.opts.gui.current_db
        mi = db.get_metadata(book_id, index_is_id=True)

        # Populate author, title at a minimum
        title = "A Book With Some Exported Annotations"
        author = "John Smith"

        # Populate a BookStruct
        book_mi = BookStruct()
        book_mi.active = True
        book_mi.author = author
        book_mi.book_id = mi.id
        book_mi.title = title
        book_mi.uuid = None
        book_mi.last_update = time.mktime(time.localtime())
        book_mi.reader_app = self.app_name
        book_mi.cid = mi.id
        book_mi.annotations = len(self.highlights)

        # Add annotations to the database
        for timestamp in sorted(self.highlights.keys()):
            book_mi.last_update = timestamp

            # Populate an AnnotationStruct
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = book_mi['book_id']
            ann_mi.last_modification = timestamp

            # Optional items
            if 'annotation_id' in self.highlights[timestamp]:
                ann_mi.annotation_id = self.highlights[timestamp]['annotation_id']
            if 'highlight_color' in self.highlights[timestamp]:
                ann_mi.highlight_color = self.highlights[timestamp]['highlight_color']
            if 'highlight_text' in self.highlights[timestamp]:
                highlight_text = '\n'.join(self.highlights[timestamp]['highlight_text'])
                ann_mi.highlight_text = highlight_text
            if 'note_text' in self.highlights[timestamp]:
                note_text = '\n'.join(self.highlights[timestamp]['note_text'])
                ann_mi.note_text = note_text

            # Add annotation to annotations_db
            self.add_to_annotations_db(self.annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            # Update last_annotation in books_db
            self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id)

        # Add book to books_db
        self.add_to_books_db(self.books_db, book_mi)
        self.annotated_book_list.append(book_mi)

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.update_timestamp(self.books_db)
        self.commit()

        # Return True if successful
        return True
コード例 #5
0
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''
        self._log("%s:get_active_annotations()" % self.app_name)

        self.active_annotations = {}

        self.opts.pb.set_label("Getting active annotations for %s" % self.app_name)
        self.opts.pb.set_value(0)

        # Don't change the template of the _db strings
        #self.books_db = "%s_books_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name))
        #self.annotations_db = "%s_annotations_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name))
        self.annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name)
        self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name)

        # Create the annotations table
        self.create_annotations_table(self.annotations_db)

        # Parse MyClippings.txt for entries matching installed_books
        self._parse_my_clippings()

        # Initialize the progress bar
        self.opts.pb.set_label("Getting highlights from %s" % self.app_name)
        self.opts.pb.set_value(0)
        self.opts.pb.show()
        self.opts.pb.set_maximum(len(self.active_annotations))

        # Add annotations to the database
        for timestamp in sorted(self.active_annotations.iterkeys()):
            # Populate an AnnotationStruct with available data
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = self.active_annotations[timestamp]['book_id']
            ann_mi.last_modification = timestamp

            this_is_news = self.collect_news_clippings and 'News' in self.get_genres(self.books_db, ann_mi.book_id)

            # Optional items
            if 'annotation_id' in self.active_annotations[timestamp]:
                ann_mi.annotation_id = self.active_annotations[timestamp]['annotation_id']
            if 'highlight_color' in self.active_annotations[timestamp]:
                ann_mi.highlight_color = self.active_annotations[timestamp]['highlight_color']
            if 'highlight_text' in self.active_annotations[timestamp]:
                highlight_text = '\n'.join(self.active_annotations[timestamp]['highlight_text'])
                ann_mi.highlight_text = highlight_text
            if this_is_news:
                ann_mi.location = self.get_title(self.books_db, ann_mi.book_id)
                ann_mi.location_sort = timestamp
            else:
                if 'location' in self.active_annotations[timestamp]:
                    ann_mi.location = self.active_annotations[timestamp]['location']
                if 'location_sort' in self.active_annotations[timestamp]:
                    ann_mi.location_sort = self.active_annotations[timestamp]['location_sort']
            if 'note_text' in self.active_annotations[timestamp]:
                note_text = '\n'.join(self.active_annotations[timestamp]['note_text'])
                ann_mi.note_text = note_text

            # Add annotation to self.annotations_db
            self.add_to_annotations_db(self.annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            # Update last_annotation in self.books_db
            self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id)

        self.opts.pb.hide()

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.commit()
コード例 #6
0
ファイル: _Stanza.py プロジェクト: wold5/calibre-annotations
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''
        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" %
                               self.app_name)
        self.opts.pb.set_value(0)

        db_profile = self._localize_database_path(self.app_id,
                                                  self.annotations_subpath)
        self.annotations_db = db_profile['path']

        # Test timestamp against cached value
        cached_db = self.generate_annotations_db_name(self.app_name_,
                                                      self.ios.device_name)
        books_db = self.generate_books_db_name(self.app_name_,
                                               self.ios.device_name)

        if self.opts.disable_caching or not self._cache_is_current(
                db_profile['stats'], cached_db):
            self._log(" fetching annotations from %s on %s" %
                      (self.app_name, self.ios.device_name))

            # Create the annotations table as needed
            self.create_annotations_table(cached_db)

            con = sqlite3.connect(self.annotations_db)
            with con:
                con.row_factory = sqlite3.Row
                cur = con.cursor()
                cur.execute('''SELECT
                                book_oid,
                                last_access,
                                book_annotations.location,
                                book_annotations.book_position,
                                note,
                                book_annotations.oid as ba_oid
                               FROM book_annotations
                               JOIN book ON book.oid = book_annotations.book_oid
                               ORDER BY book_annotations.book_position
                            ''')
                rows = cur.fetchall()
                self.opts.pb.set_maximum(len(rows))
                annotations = {}
                timestamp = None
                for row in rows:
                    self.opts.pb.increment()
                    book_id = row[b'book_oid']
                    if not book_id in self.installed_books:
                        continue

                    # Annotations are quoted. Anything afterwards is a note.
                    # Assuming that the user hasn't edited the opening/closing quotes,
                    # we can assume that a sequence of '"\n' is a valid split point.
                    full_annotation = row[b'note']
                    highlight_text = None
                    note_text = None
                    if full_annotation.startswith(
                            '"') and full_annotation.endswith('"'):
                        # Highlight only - strip opening/closing quotes
                        highlight_text = [full_annotation[1:-1]]
                    elif '"\n' in full_annotation:
                        # Presumed to be a hybrid highlight/note, separated by closing quote/LF
                        tokens = full_annotation.split('"\n')
                        highlight_text = [tokens[0][1:]]
                        note_text = tokens[1].split('\n')
                    else:
                        # User manually removed the quotes, assume it's just a note
                        note_text = full_annotation.split('\n')

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = row[b'ba_oid']
                    a_mi.book_id = book_id
                    a_mi.epubcfi = row[b'location']
                    a_mi.highlight_color = 'Yellow'
                    if highlight_text:
                        a_mi.highlight_text = '\n'.join(highlight_text)
                    if note_text:
                        a_mi.note_text = '\n'.join(note_text)

                    section = self._get_spine_index(a_mi.epubcfi)
                    try:
                        a_mi.location = self.tocs[book_id]["%.0f" % (section)]
                    except:
                        a_mi.location = "Section %d" % section
                    a_mi.location_sort = row[b'book_position']

                    # Stanza doesn't timestamp individual annotations
                    # Space them 1 second apart
                    timestamp = row[b'last_access']
                    while timestamp in annotations:
                        timestamp += 1
                    a_mi.last_modification = timestamp + self.NSTimeIntervalSince1970
                    annotations[timestamp] = a_mi

                for timestamp in annotations:
                    self.add_to_annotations_db(cached_db,
                                               annotations[timestamp])

                # Update last_annotation in books_db
                if timestamp:
                    self.update_book_last_annotation(books_db, timestamp,
                                                     book_id)
                    self.update_timestamp(cached_db)
                    self.commit()

        else:
            self._log(" retrieving cached annotations from %s" % cached_db)
コード例 #7
0
    def parse_exported_highlights(self, raw, log_failure=True):
        """
        Extract highlights from pasted Annotation summary email
        Return True if no problems
        Return False if error
        """
        # Create the annotations, books table as needed
        self.annotations_db = "%s_imported_annotations" % self.app_name_
        self.create_annotations_table(self.annotations_db)
        self.books_db = "%s_imported_books" % self.app_name_
        self.create_books_table(self.books_db)

        self.annotated_book_list = []
        self.selected_books = None

        self._log("raw highlights: {0}".format(raw))

        # Generate the book metadata from the selected book
        row = self.opts.gui.library_view.currentIndex()
        book_id = self.opts.gui.library_view.model().id(row)
        db = self.opts.gui.current_db
        mi = db.get_metadata(book_id, index_is_id=True)

        # Grab the title from the front of raw
        try:
            title = re.match(r'(?m)File: (?P<title>.*)$', raw).group('title')
            self._log("title='{0}".format(title))

            # Populate a BookStruct
            book_mi = BookStruct()
            book_mi.active = True
            book_mi.author = 'Unknown'
            book_mi.book_id = mi.id
            book_mi.title = title
            book_mi.uuid = None
            book_mi.last_update = time.mktime(time.localtime())
            book_mi.reader_app = self.app_name
            book_mi.cid = mi.id

            gr_annotations = raw.split('\n')
            num_lines = len(gr_annotations)
            highlights = {}

            # Find the first annotation
            i = 0
            line = gr_annotations[i]
            self._log("Looking for Page: Line number={0} line='{1}'".format(
                i, line))
            while not line.startswith('--- Page'):
                self._log(" unable to parse GoodReader Annotation summary")
                i += 1
                line = gr_annotations[i]
                self._log(
                    "Looking for Page: Line number={0} line='{1}'".format(
                        i, line))

            while i < num_lines and not line.startswith(
                    '(report generated by GoodReader)'):
                # Extract the page number
                page_num = re.search('--- (Page \w+) ---', line)
                self._log("regex result: page_num={0}".format(page_num))
                if page_num:
                    page_num = page_num.group(1)
                    self._log("page_num={0}".format(page_num))

                    # Extract the highlight
                    i += 1
                    line = gr_annotations[i]
                    self._log(
                        "Looking for annotation start: Line number={0} line='{1}'"
                        .format(i, line))

                    prefix = None
                    while True:
                        prefix = re.search(
                            '^(?P<ann_type>{0})'.format(
                                '|'.join(self.ANNOTATION_TYPES +
                                         self.SKIP_TYPES)), line)
                        self._log("Searched for prefix={0}".format(prefix))
                        if prefix and prefix.group(
                                'ann_type') in self.SKIP_TYPES:
                            i += 1
                            line = gr_annotations[i]
                            self._log(
                                "Looking for annotation start: Line number={0} line='{1}'"
                                .format(i, line))
                            while not re.search(
                                    '^(?P<ann_type>{0})'.format('|'.join(
                                        self.ANNOTATION_TYPES)), line):
                                i += 1
                                line = gr_annotations[i]
                                self._log(
                                    "Looking for annotation start after a SKIP type: Line number={0} line='{1}'"
                                    .format(i, line))
                            continue
                        elif prefix:
                            self._log(
                                "Have annotation start: Line number={0} line='{1}' prefix={2}"
                                .format(i, line, prefix))
                            break
                        else:
                            i += 1
                            line = gr_annotations[i]
                            self._log(
                                "Looking for annotation start 2: Line number={0} line='{1}'"
                                .format(i, line))

                    annotation = self._extract_highlight(
                        line, prefix.group('ann_type'))
                    annotation.page_num = page_num
                    self._log(
                        "Started annotation: page_num={0} annotation='{1}'".
                        format(page_num, annotation))

                    # Get the annotation(s)
                    i += 1
                    line = gr_annotations[i]
                    self._log(
                        "Reading annotation text 1: Line number={0} line='{1}'"
                        .format(i, line))
                    ann = ''
                    while i < num_lines \
                        and not line.startswith('--- Page') \
                        and not line.startswith('(report generated by GoodReader)'):

                        if line:
                            prefix = re.search(
                                '^(?P<ann_type>{0})'.format(
                                    '|'.join(self.ANNOTATION_TYPES +
                                             self.SKIP_TYPES)), line)
                            if prefix and prefix.group(
                                    'ann_type') in self.SKIP_TYPES:
                                # Continue until next ann_type
                                i += 1
                                line = gr_annotations[i]
                                while not re.search(
                                        '^(?P<ann_type>{0})'.format('|'.join(
                                            self.ANNOTATION_TYPES)), line):
                                    i += 1
                                    if i == num_lines:
                                        break
                                    line = gr_annotations[i]
                                continue
                            elif prefix:
                                # Additional highlight on the same page
                                # write current annotation, start new annotation
                                self._store_annotation(highlights, annotation)
                                annotation = self._extract_highlight(
                                    line, prefix.group('ann_type'))
                                annotation.page_num = page_num
                                annotation.ann_type = prefix.group('ann_type')
                                ann = ''
                                i += 1
                                line = gr_annotations[i]
                                continue

                            if not ann:
                                ann = line
                            else:
                                ann += '\n' + line
                        i += 1
                        line = gr_annotations[i]
                        annotation.ann = ann

                    # Back up so that the next line is '--- Page' or '(report generated'
                    i -= 1
                    self._store_annotation(highlights, annotation)

                i += 1
                if i == num_lines:
                    break
                line = gr_annotations[i]
        except Exception as e:
            import traceback
            self._log("Exception parsing GoodReader Annotation summary: %s" %
                      e)
            traceback.print_exc()
            if log_failure:
                self._log(" unable to parse GoodReader Annotation summary")
                self._log("{:~^80}".format(" Imported Annotation summary "))
                self._log(raw)
                self._log(
                    "{:~^80}".format(" end imported Annotations summary "))
                import traceback
                traceback.print_exc()
                msg = ('Unable to parse Annotation summary from %s. ' %
                       self.app_name +
                       'Paste entire contents of emailed summary.')
                MessageBox(MessageBox.WARNING,
                           'Error importing annotations',
                           msg,
                           show_copy_button=False,
                           parent=self.opts.gui).exec_()
                self._log_location("WARNING: %s" % msg)
            return False

        # Finalize book_mi
        book_mi.annotations = len(highlights)
        # Add book to books_db
        self.add_to_books_db(self.books_db, book_mi)
        self.annotated_book_list.append(book_mi)

        sorted_keys = sorted(list(highlights.keys()))
        for dt in sorted_keys:
            highlight_text = None
            if 'text' in highlights[dt]:
                highlight_text = highlights[dt]['text']
            note_text = None
            if 'note' in highlights[dt]:
                note_text = highlights[dt]['note']

            # Populate an AnnotationStruct
            a_mi = AnnotationStruct()
            a_mi.annotation_id = dt
            a_mi.book_id = book_mi['book_id']
            a_mi.highlight_color = highlights[dt]['color']
            a_mi.highlight_text = highlight_text
            a_mi.location = highlights[dt]['page']
            a_mi.last_modification = dt
            a_mi.note_text = note_text

            # Location sort
            page_literal = re.match(r'^Page (?P<page>[0-9ivx]+).*$',
                                    a_mi.location).group('page')
            if re.match('[IXVL]', page_literal.upper()):
                whole = 0
                decimal = self._roman_to_int(page_literal)
            else:
                whole = int(page_literal)
                decimal = 0
            a_mi.location_sort = "%05d.%05d" % (whole, decimal)

            # Add annotation
            self.add_to_annotations_db(self.annotations_db, a_mi)
            self.update_book_last_annotation(self.books_db, dt,
                                             book_mi['book_id'])

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.update_timestamp(self.books_db)
        self.commit()

        return True
コード例 #8
0
        def _process_individual_book(book):
            book_mi = BookStruct()
            book_mi['reader_app'] = self.app_name
            book_mi['cid'] = None
            for md in xl:
                book_mi[xl[md]] = book.get(md)
            book_mi['active'] = True
            book_mi['annotations'] = 0
            subjects = book.find('subjects')
            if subjects is not None:
                sl = [s.text for s in subjects]
                book_mi['genre'] = ', '.join(sl)

            this_is_news = False
            if 'News' in book_mi['genre']:
                if not self.collect_news_clippings:
                    return
                this_is_news = True

            # Get the last update, count active annotations
            last_update = 0
            hls = book.find('highlights')
            for hl in hls:
                this_ts = hl.get('datetime')
                if this_ts > last_update:
                    last_update = this_ts
                if hl.get('deleted') == '0':
                    book_mi['annotations'] += 1
            book_mi['last_update'] = float(last_update)

            # Get the library cid, confidence
            toc_entries = None
            if this_is_news:
                cid = self.news_clippings_cid
                confidence = 5
            else:
                cid, confidence = self.parent.generate_confidence(book_mi)
                if confidence >= 2:
                    toc_entries = self._get_epub_toc(cid=cid)

            # Add annotated book to the db, master_list
            if len(hls):
                self.add_to_books_db(self.books_db, book_mi)
                self.annotated_book_list.append(book_mi)

                # Add the active annotations for this book to the db
                highlights = {}
                for hl in hls:
                    if hl.get('deleted') == '1':
                        continue
                    datetime = hl.get('datetime')
                    highlights[datetime] = {}
                    for md in ['text', 'note', 'color', 'key', 'deleted', 'section',
                               'startx', 'startoffset']:
                        highlights[datetime][md] = hl.get(md)

                sorted_keys = sorted(highlights.iterkeys())
                for datetime in sorted_keys:
                    highlight_text = highlights[datetime]['text']
                    note_text = highlights[datetime]['note']

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = highlights[datetime]['key']
                    a_mi.book_id = book_mi['book_id']
                    a_mi.highlight_color = self.HIGHLIGHT_COLORS[int(highlights[datetime]['color'])]
                    a_mi.highlight_text = highlight_text
                    a_mi.last_modification = datetime
                    try:
                        section = str(int(highlights[datetime]['section']) - 1)
                        a_mi.location = toc_entries[section]
                    except:
                        if this_is_news:
                            a_mi.location = book_mi['title']
                        else:
                            a_mi.location = "Section %s" % highlights[datetime]['section']
                    a_mi.note_text = note_text

                    # If empty highlight_text and empty note_text, not a useful annotation
                    if (not highlight_text.strip() and not note_text.strip()):
                        continue

                    # Generate location_sort
                    if this_is_news:
                        a_mi.location_sort = datetime
                    else:
                        interior = self._generate_interior_location_sort(highlights[datetime]['startx'])
                        if not interior:
                            self._log("Marvin: unable to parse xpath:")
                            self._log(" %s" % highlights[datetime]['startx'])
                            self._log(a_mi)
                            continue

                        a_mi.location_sort = "%04d.%s.%04d" % (
                            int(highlights[datetime]['section']),
                            interior,
                            int(highlights[datetime]['startoffset']))

                    self.add_to_annotations_db(self.annotations_db, a_mi)
                    self.update_book_last_annotation(self.books_db, datetime, book_mi['book_id'])

            # Update the timestamps
            self.update_timestamp(self.annotations_db)
            self.update_timestamp(self.books_db)
            self.commit()
コード例 #9
0
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''
        # Sample annotations, indexed by timestamp. Note that annotations may have
        # highlight_text, note_text, or both.
        dict_of_anns = {}
        ts = datetime.datetime(2012, 12, 4, 8, 15, 0)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 1,
                                'highlight_color': 'Gray',
                                'highlight_text': ['The first paragraph of the first highlight.',
                                                   'The second paragaph of the first highlight.'],
                               }
        ts = ts.replace(minute=16)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 1,
                                'highlight_color': 'Gray',
                                'highlight_text': ['The first paragraph of the second highlight.',
                                                   'The second paragaph of the second highlight.'],
                                'note_text': ['A note added to the second highlight']
                               }
        ts = ts.replace(minute=17)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 1,
                                'highlight_color': 'Gray',
                                'note_text': ['A note added to the third highlight']
                               }

        ts = datetime.datetime(2012, 12, 10, 9, 0, 0)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 2,
                                'highlight_color': 'Gray',
                                'highlight_text': ['The first paragraph of the first highlight.',
                                                   'The second paragaph of the first highlight.']
                               }
        ts = ts.replace(minute=1)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 2,
                                'highlight_color': 'Gray',
                                'highlight_text': ['The first paragraph of the second highlight.',
                                                   'The second paragaph of the second highlight.'],
                                'note_text': ['A note added to the second highlight']
                               }
        ts = ts.replace(minute=2)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 2,
                                'highlight_color': 'Gray',
                                'note_text': ['A note added to the third highlight']
                               }

        ts = datetime.datetime(2012, 12, 31, 23, 59, 0)
        dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 999,
                                'highlight_color': 'Gray',
                                'highlight_text': ['An orphan annotation (no book)']
                               }

        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" % self.app_name)
        self.opts.pb.set_value(0)

        annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name)
        books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name)

        # Create the annotations table
        self.create_annotations_table(annotations_db)

        # Initialize the progress bar
        self.opts.pb.set_label("Getting highlights from %s" % self.app_name)
        self.opts.pb.set_value(0)
        self.opts.pb.set_maximum(len(dict_of_anns))

        # Add annotations to the database
        for timestamp in sorted(dict_of_anns.iterkeys()):
            # Populate an AnnotationStruct with available data
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = dict_of_anns[timestamp]['book_id']
            ann_mi.last_modification = timestamp

            # Optional items
            if 'annotation_id' in dict_of_anns[timestamp]:
                ann_mi.annotation_id = dict_of_anns[timestamp]['annotation_id']
            if 'highlight_color' in dict_of_anns[timestamp]:
                ann_mi.highlight_color = dict_of_anns[timestamp]['highlight_color']
            if 'highlight_text' in dict_of_anns[timestamp]:
                highlight_text = '\n'.join(dict_of_anns[timestamp]['highlight_text'])
                ann_mi.highlight_text = highlight_text
            if 'note_text' in dict_of_anns[timestamp]:
                note_text = '\n'.join(dict_of_anns[timestamp]['note_text'])
                ann_mi.note_text = note_text

            # Add annotation to annotations_db
            self.add_to_annotations_db(annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            # Update last_annotation in books_db
            self.update_book_last_annotation(books_db, timestamp, ann_mi.book_id)

        # Update the timestamp
        self.update_timestamp(annotations_db)
        self.commit()
コード例 #10
0
    def get_active_annotations(self):
        """
        Fetch active iBooks annotations from AEAnnotation_*.sqlite
        """
        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" % self.app_name)
        self.opts.pb.set_value(0)

        db_profile = self._localize_database_path(self.app_id, self.annotations_subpath)
        self.annotations_db = db_profile['path']

        # Test timestamp against cached value
        cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name)
        books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name)

        if self.opts.disable_caching or not self._cache_is_current(db_profile['stats'], cached_db):
            self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name))

            # Create the annotations table as needed
            self.create_annotations_table(cached_db)

            con = sqlite3.connect(self.annotations_db)
            with con:
                con.row_factory = sqlite3.Row
                cur = con.cursor()
                cur.execute('''SELECT
                                ZANNOTATIONASSETID,
                                ZANNOTATIONLOCATION,
                                ZANNOTATIONMODIFICATIONDATE,
                                ZANNOTATIONNOTE,
                                ZANNOTATIONSELECTEDTEXT,
                                ZANNOTATIONSTYLE,
                                ZANNOTATIONUUID
                               FROM ZAEANNOTATION
                               WHERE ZANNOTATIONDELETED = 0 and ZANNOTATIONTYPE = 2
                               ORDER BY ZANNOTATIONMODIFICATIONDATE
                            ''')
                rows = cur.fetchall()
                self.opts.pb.set_maximum(len(rows))
                for row in rows:
                    self.opts.pb.increment()
                    book_id = row[b'ZANNOTATIONASSETID']
                    if not book_id in self.installed_books:
                        continue

                    # Collect the metadata

                    # Sanitize text, note to unicode
                    highlight_text = re.sub('\xa0', ' ', row[b'ZANNOTATIONSELECTEDTEXT'])
                    highlight_text = UnicodeDammit(highlight_text).unicode
                    highlight_text = highlight_text.rstrip('\n').split('\n')
                    while highlight_text.count(''):
                        highlight_text.remove('')
                    highlight_text = [line.strip() for line in highlight_text]

                    note_text = None
                    if row[b'ZANNOTATIONNOTE']:
                        note_text = UnicodeDammit(row[b'ZANNOTATIONNOTE']).unicode
                        note_text = note_text.rstrip('\n').split('\n')[0]

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = row[b'ZANNOTATIONUUID']
                    a_mi.book_id = book_id
                    a_mi.epubcfi = row[b'ZANNOTATIONLOCATION']
                    a_mi.highlight_color = self.HIGHLIGHT_COLORS[row[b'ZANNOTATIONSTYLE']]
                    a_mi.highlight_text = '\n'.join(highlight_text)
                    a_mi.last_modification = row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970
                    if a_mi.epubcfi:
                        section = self._get_spine_index(a_mi.epubcfi)
                        try:
                            a_mi.location = self.tocs[book_id]["%.0f" % (section - 1)]
                        except:
                            a_mi.location = "Section %d" % section
                        if self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id):
                            a_mi.location_sort = a_mi.last_modification
                        else:
                            a_mi.location_sort = self._generate_location_sort(a_mi.epubcfi)
                    else:
                        if self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id):
                            a_mi.location = self.get_title(books_db, book_id)
                            a_mi.location_sort = a_mi.last_modification

                    a_mi.note_text = note_text

                    # Add annotation
                    self.add_to_annotations_db(cached_db, a_mi)

                    # Update last_annotation in books_db
                    self.update_book_last_annotation(books_db,
                                                 row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970,
                                                 book_id)

                self.update_timestamp(cached_db)
                self.commit()

        else:
            self._log(" retrieving cached annotations from %s" % cached_db)
コード例 #11
0
    def get_active_annotations(self):

        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" % self.app_name)
        self.opts.pb.set_value(0)

        db_profile = self._localize_database_path(self.app_id, self.annotations_subpath)
        self.annotations_db = db_profile['path']

        # Test timestamp against cached value
        cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name)
        books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name)

        if self.opts.disable_caching or not self._cache_is_current(db_profile['stats'], cached_db):
            self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name))

            # Create the annotations table as needed
            self.create_annotations_table(cached_db)
            obsolete_bookmarks = 0
            deleted_bookmarks = 0

            con = sqlite3.connect(self.annotations_db)
            with con:
                con.row_factory = sqlite3.Row
                cur = con.cursor()
                cur.execute('''SELECT * FROM Highlights
                               ORDER BY NoteDateTime
                            ''')
                rows = cur.fetchall()
                self.opts.pb.set_maximum(len(rows))
                for row in rows:
                    self.opts.pb.increment()

                    book_id = row[b'BookID']
                    if not book_id in self.installed_books:
                        obsolete_bookmarks += 1
                        continue

                    # Collect the markup/highlight count for all installed books
                    if row[b'Deleted'] == 1:
                        deleted_bookmarks += 1
                        continue

                    this_is_news = self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id)

                    # Sanitize text, note to unicode
                    highlight_text = re.sub('\xa0', ' ', row[b'Text'])
                    highlight_text = UnicodeDammit(highlight_text).unicode
                    highlight_text = highlight_text.rstrip('\n').split('\n')
                    while highlight_text.count(''):
                        highlight_text.remove('')
                    highlight_text = [line.strip() for line in highlight_text]

                    note_text = None
                    if row[b'Note']:
                        ntu = UnicodeDammit(row[b'Note']).unicode
                        note_text = ntu.rstrip('\n')

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = row[b'UUID']
                    a_mi.book_id = book_id
                    a_mi.highlight_color = self.HIGHLIGHT_COLORS[row[b'Colour']]
                    a_mi.highlight_text = '\n'.join(highlight_text)
                    a_mi.last_modification = row[b'NoteDateTime']

                    section = str(int(row[b'Section']) - 1)
                    try:
                        a_mi.location = self.tocs[book_id][section]
                    except:
                        if this_is_news:
                            a_mi.location = self.get_title(books_db, book_id)
                        else:
                            a_mi.location = "Section %s" % row[b'Section']

                    a_mi.note_text = note_text

                    # If empty highlight_text and empty note_text, not a useful annotation
                    if not highlight_text and not note_text:
                        continue

                    # Generate location_sort
                    if this_is_news:
                        a_mi.location_sort = row[b'NoteDateTime']
                    else:
                        interior = self._generate_interior_location_sort(row[b'StartXPath'])
                        if not interior:
                            self._log("Marvin: unable to parse xpath:")
                            self._log(row[b'StartXPath'])
                            self._log(a_mi)
                            continue

                        a_mi.location_sort = "%04d.%s.%04d" % (
                            int(row[b'Section']),
                            interior,
                            int(row[b'StartOffset']))

                    # Add annotation
                    self.add_to_annotations_db(cached_db, a_mi)

                    # Update last_annotation in books_db
                    self.update_book_last_annotation(books_db, row[b'NoteDateTime'], book_id)

                # Update the timestamp
                self.update_timestamp(cached_db)
                self.commit()

        else:
            self._log(" retrieving cached annotations from %s" % cached_db)
コード例 #12
0
    def capture_content(self, uas, book_id, transient_db):
        '''
        Store a set of annotations to the transient table
        '''
        self.create_annotations_transient_table(transient_db)
        self._log_location(book_id, uas)
        for ua in uas:
            if isinstance(ua, NavigableString):
                continue
            if ua.name != 'div' or ua['class'] != "annotation":
                continue
            this_ua = AnnotationStruct()
            this_ua.book_id = book_id
            this_ua.hash = ua['hash']
            try:
                this_ua.genre = ua['genre']
            except:
                this_ua.genre = None

            try:
                this_ua.highlight_color = ua.find('table')['color']
            except:
                this_ua.highlight_color = 'gray'
            
            try:
                this_ua.reader = ua['reader']
            except:
                this_ua.reader = ''

            try:
                this_ua.last_modification = ua.find('td', 'timestamp')['uts']
            except:
                this_ua.last_modification = "0"

            try:
                this_ua.location = ua.find('td', 'location').string
            except:
                this_ua.location = ""

            try:
                this_ua.location_sort = ua['location_sort']
            except:
                this_ua.location_sort = ""

            try:
                pels = ua.findAll('p', 'highlight')
                self._log_location(book_id, "highlight pels={0}".format(pels))
                this_ua.highlight_text = '\n'.join([p.string for p in pels])
                self._log_location(book_id, "highlight - this_ua.highlight_text={0}".format(this_ua.highlight_text))
            except:
                pass

            try:
                nels = ua.findAll('p', 'note')
                self._log_location(book_id, "note nels={0}".format(nels))
                this_ua.note_text = '\n'.join([n.string for n in nels])
                self._log_location(book_id, "highlight - this_ua.note_text={0}".format(this_ua.note_text))
            except:
                pass

            self.add_to_transient_db(transient_db, this_ua)
コード例 #13
0
    def capture_content(self, uas, book_id, transient_db):
        '''
        Store a set of annotations to the transient table
        '''
        self.create_annotations_transient_table(transient_db)
        self._log_location(book_id, uas)
        annotation_list = []
        for ua in uas:
            self._log_location(book_id, ua)
            if isinstance(ua, NavigableString):
                continue
            if ua.name != 'div' or ua['class'][0] != "annotation":
                continue
            this_ua = AnnotationStruct()
            this_ua.book_id = book_id
            this_ua.hash = ua['hash']
            try:
                this_ua.genre = ua['genre']
            except:
                this_ua.genre = None

            try:
                this_ua.highlight_color = ua.find('table')['color']
            except:
                this_ua.highlight_color = 'gray'
            
            try:
                this_ua.reader = ua['reader']
            except:
                this_ua.reader = ''

            try:
                this_ua.last_modification = ua.find('td', 'timestamp')['uts']
            except:
                this_ua.last_modification = "0"

            try:
                this_ua.location = ua.find('td', 'location').string
            except:
                this_ua.location = ""

            try:
                this_ua.location_sort = ua['location_sort']
            except:
                this_ua.location_sort = ""

            try:
                pels = ua.findAll('p', 'highlight')
                self._log_location(book_id, "highlight pels={0}".format(pels))
                this_ua.highlight_text = '\n'.join([p.string or '' for p in pels])
                self._log_location(book_id, "highlight - this_ua.highlight_text={0}".format(this_ua.highlight_text))
            except:
                pass

            try:
                nels = ua.findAll('p', 'note')
                self._log_location(book_id, "note nels={0}".format(nels))
                this_ua.note_text = '\n'.join([n.string or '' for n in nels])
                self._log_location(book_id, "highlight - this_ua.note_text={0}".format(this_ua.note_text))
            except:
                pass

            self._log_location(book_id, this_ua)
            annotation_list.append(this_ua)
        return annotation_list
コード例 #14
0
    def parse_exported_highlights(self, raw, log_failure=True):
        """
        Extract highlights from pasted Annotations summary, add them to selected book
        in calibre library

        Construct a BookStruct object with the book's metadata.
        Starred items are minimally required.
           BookStruct properties:
            *active: [True|False]
            *author: "John Smith"
             author_sort: (if known)
            *book_id: an int uniquely identifying the book.
                     Highlights are associated with books through book_id
             genre: "Fiction" (if known)
            *title: "The Story of John Smith"
             title_sort: "Story of John Smith, The" (if known)
             uuid: Calibre's uuid for this book, if known

        Construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time

        """
        # Create the annotations, books table as needed
        self.annotations_db = "%s_imported_annotations" % self.app_name_
        self.create_annotations_table(self.annotations_db)
        self.books_db = "%s_imported_books" % self.app_name_
        self.create_books_table(self.books_db)

        self.annotated_book_list = []
        self.selected_books = None

        # Generate the book metadata from the selected book
        row = self.opts.gui.library_view.currentIndex()
        book_id = self.opts.gui.library_view.model().id(row)
        db = self.opts.gui.current_db
        mi = db.get_metadata(book_id, index_is_id=True)

        try:
            lines = raw.split('\n')
            if len(lines) < 5:
                raise AnnotationsException("Invalid annotations summary")
            index = 0
            annotations = {}

            # Get the title, author, publisher from the first three lines
            title = lines[index]
            index += 1
            author = lines[index]
            index += 1
            publisher = lines[index]
            index += 1

            # Next line should be the first timestamp/location
            while index < len(lines):
                tsl = re.match(r'^(?P<timestamp>.*) \((?P<location>Page .*)\)', lines[index])
                if tsl:
                    ts = tsl.group('timestamp')
                    isoformat = parse_date(ts, as_utc=False)
                    isoformat = isoformat.replace(hour=12)
                    timestamp = mktime(isoformat.timetuple())
                    while timestamp in annotations:
                        timestamp += 60

                    location = tsl.group('location')
                    index += 1

                    # Continue with highlight
                    highlight_text = lines[index]
                    index += 1

                    # Next line is either Note: or a new tsl
                    note = re.match(r'^Notes: (?P<note_text>.*)', lines[index])
                    note_text = None
                    if note:
                        note_text = note.group('note_text')
                        index += 1

                    if re.match(r'^(?P<timestamp>.*) \((?P<location>Page .*)\)', lines[index]):
                        # New note - store the old one, continue
                        ann = AnnotationStruct()
                        ann.book_id = mi.id
                        ann.annotation_id = index
                        ann.highlight_color = 'Yellow'
                        ann.highlight_text = highlight_text
                        ann.location = location
                        ann.location_sort = "%05d" % int(re.match(r'^Page (?P<page>\d+).*$', location).group('page'))
                        ann.note_text = note_text
                        ann.last_modification = timestamp

                        # Add annotation to db
                        annotations[timestamp] = ann
                        continue
                else:
                    # Store the last one
                    ann = AnnotationStruct()
                    ann.book_id = mi.id
                    ann.annotation_id = index
                    ann.highlight_color = 'Yellow'
                    ann.highlight_text = highlight_text
                    ann.location = location
                    ann.location_sort = "%05d" % int(re.match(r'^Page (?P<page>\d+).*$', location).group('page'))
                    ann.note_text = note_text
                    ann.last_modification = timestamp
                    annotations[timestamp] = ann
                    break
        except:
            if log_failure:
                self._log(" unable to parse %s Annotations" % self.app_name)
                self._log("{:~^80}".format(" Imported Annotation summary "))
                self._log(raw)
                self._log("{:~^80}".format(" end imported Annotations summary "))
                import traceback
                traceback.print_exc()
                msg = ('Unable to parse Annotation summary from %s. ' % self.app_name +
                    'Paste entire contents of emailed summary.')
                MessageBox(MessageBox.WARNING,
                    'Error importing annotations',
                    msg,
                    show_copy_button=False,
                    parent=self.opts.gui).exec_()
                self._log_location("WARNING: %s" % msg)
            return False

        # Populate a BookStruct
        book_mi = BookStruct()
        book_mi.active = True
        book_mi.author = author
        book_mi.book_id = mi.id
        book_mi.title = title
        book_mi.uuid = None
        book_mi.last_update = time.mktime(time.localtime())
        book_mi.reader_app = self.app_name
        book_mi.cid = mi.id
        book_mi.annotations = len(annotations)

        # Add book to books_db
        self.add_to_books_db(self.books_db, book_mi)
        self.annotated_book_list.append(book_mi)

        # Add the annotations
        for timestamp in sorted(annotations.keys()):
            self.add_to_annotations_db(self.annotations_db, annotations[timestamp])
            self.update_book_last_annotation(self.books_db, timestamp, mi.id)
            self.opts.pb.increment()
            self.update_book_last_annotation(self.books_db, timestamp, mi.id)

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.update_timestamp(self.books_db)
        self.commit()

        # Return True if successful
        return True
コード例 #15
0
    def parse_exported_highlights(self, raw, log_failure=True):
        """
        Extract highlights from pasted Annotation summary email
        Return True if no problems
        Return False if error
        """
        # Create the annotations, books table as needed
        self.annotations_db = "%s_imported_annotations" % self.app_name_
        self.create_annotations_table(self.annotations_db)
        self.books_db = "%s_imported_books" % self.app_name_
        self.create_books_table(self.books_db)

        self.annotated_book_list = []
        self.selected_books = None

        # Generate the book metadata from the selected book
        row = self.opts.gui.library_view.currentIndex()
        book_id = self.opts.gui.library_view.model().id(row)
        db = self.opts.gui.current_db
        mi = db.get_metadata(book_id, index_is_id=True)

        # Grab the title from the front of raw
        try:
            title = re.match(r'(?m)File: (?P<title>.*)$', raw).group('title')

            # Populate a BookStruct
            book_mi = BookStruct()
            book_mi.active = True
            book_mi.author = 'Unknown'
            book_mi.book_id = mi.id
            book_mi.title = title
            book_mi.uuid = None
            book_mi.last_update = time.mktime(time.localtime())
            book_mi.reader_app = self.app_name
            book_mi.cid = mi.id

            gr_annotations = raw.split('\n')
            num_lines = len(gr_annotations)
            highlights = {}

            # Find the first annotation
            i = 0
            line = gr_annotations[i]
            while not line.startswith('--- Page'):
                i += 1
                line = gr_annotations[i]

            while i < num_lines and not line.startswith('(report generated by GoodReader)'):
                # Extract the page number
                page_num = re.search('--- (Page \w+) ---', line)
                if page_num:
                    page_num = page_num.group(1)

                    # Extract the highlight
                    i += 1
                    line = gr_annotations[i]

                    prefix = None
                    while True:
                        prefix = re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line)
                        if prefix and prefix.group('ann_type') in self.SKIP_TYPES:
                            i += 1
                            line = gr_annotations[i]
                            while not re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES)), line):
                                i += 1
                                line = gr_annotations[i]
                            continue
                        elif prefix:
                            break
                        else:
                            i += 1
                            line = gr_annotations[i]

                    annotation = self._extract_highlight(line, prefix.group('ann_type'))
                    annotation.page_num = page_num

                    # Get the annotation(s)
                    i += 1
                    line = gr_annotations[i]
                    ann = ''
                    while i < num_lines \
                        and not line.startswith('--- Page') \
                        and not line.startswith('(report generated by GoodReader)'):

                        if line:
                            prefix = re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line)
                            if prefix and prefix.group('ann_type') in self.SKIP_TYPES:
                                # Continue until next ann_type
                                i += 1
                                line = gr_annotations[i]
                                while not re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES)), line):
                                    i += 1
                                    if i == num_lines:
                                        break
                                    line = gr_annotations[i]
                                continue
                            elif prefix:
                                # Additional highlight on the same page
                                # write current annotation, start new annotation
                                self._store_annotation(highlights, annotation)
                                annotation = self._extract_highlight(line, prefix.group('ann_type'))
                                annotation.page_num = page_num
                                annotation.ann_type = prefix.group('ann_type')
                                ann = ''
                                i += 1
                                line = gr_annotations[i]
                                continue

                            if not ann:
                                ann = line
                            else:
                                ann += '\n' + line
                        i += 1
                        line = gr_annotations[i]
                        annotation.ann = ann

                    # Back up so that the next line is '--- Page' or '(report generated'
                    i -= 1
                    self._store_annotation(highlights, annotation)

                i += 1
                if i == num_lines:
                    break
                line = gr_annotations[i]
        except:
            if log_failure:
                self._log(" unable to parse GoodReader Annotation summary")
                self._log("{:~^80}".format(" Imported Annotation summary "))
                self._log(raw)
                self._log("{:~^80}".format(" end imported Annotations summary "))
                import traceback
                traceback.print_exc()
                msg = ('Unable to parse Annotation summary from %s. ' % self.app_name +
                    'Paste entire contents of emailed summary.')
                MessageBox(MessageBox.WARNING,
                    'Error importing annotations',
                    msg,
                    show_copy_button=False,
                    parent=self.opts.gui).exec_()
                self._log_location("WARNING: %s" % msg)
            return False

        # Finalize book_mi
        book_mi.annotations = len(highlights)
        # Add book to books_db
        self.add_to_books_db(self.books_db, book_mi)
        self.annotated_book_list.append(book_mi)

        sorted_keys = sorted(highlights.iterkeys())
        for dt in sorted_keys:
            highlight_text = None
            if 'text' in highlights[dt]:
                highlight_text = highlights[dt]['text']
            note_text = None
            if 'note' in highlights[dt]:
                note_text = highlights[dt]['note']

            # Populate an AnnotationStruct
            a_mi = AnnotationStruct()
            a_mi.annotation_id = dt
            a_mi.book_id = book_mi['book_id']
            a_mi.highlight_color = highlights[dt]['color']
            a_mi.highlight_text = highlight_text
            a_mi.location = highlights[dt]['page']
            a_mi.last_modification = dt
            a_mi.note_text = note_text

            # Location sort
            page_literal = re.match(r'^Page (?P<page>[0-9ivx]+).*$', a_mi.location).group('page')
            if re.match('[IXVL]', page_literal.upper()):
                whole = 0
                decimal = self._roman_to_int(page_literal)
            else:
                whole = int(page_literal)
                decimal = 0
            a_mi.location_sort = "%05d.%05d" % (whole, decimal)

            # Add annotation
            self.add_to_annotations_db(self.annotations_db, a_mi)
            self.update_book_last_annotation(self.books_db, dt, book_mi['book_id'])

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.update_timestamp(self.books_db)
        self.commit()

        return True
コード例 #16
0
ファイル: _iBooks.py プロジェクト: wold5/calibre-annotations
    def get_active_annotations(self):
        """
        Fetch active iBooks annotations from AEAnnotation_*.sqlite
        """
        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" %
                               self.app_name)
        self.opts.pb.set_value(0)

        db_profile = self._localize_database_path(self.app_id,
                                                  self.annotations_subpath)
        self.annotations_db = db_profile['path']

        # Test timestamp against cached value
        cached_db = self.generate_annotations_db_name(self.app_name_,
                                                      self.ios.device_name)
        books_db = self.generate_books_db_name(self.app_name_,
                                               self.ios.device_name)

        if self.opts.disable_caching or not self._cache_is_current(
                db_profile['stats'], cached_db):
            self._log(" fetching annotations from %s on %s" %
                      (self.app_name, self.ios.device_name))

            # Create the annotations table as needed
            self.create_annotations_table(cached_db)

            con = sqlite3.connect(self.annotations_db)
            with con:
                con.row_factory = sqlite3.Row
                cur = con.cursor()
                cur.execute('''SELECT
                                ZANNOTATIONASSETID,
                                ZANNOTATIONLOCATION,
                                ZANNOTATIONMODIFICATIONDATE,
                                ZANNOTATIONNOTE,
                                ZANNOTATIONSELECTEDTEXT,
                                ZANNOTATIONSTYLE,
                                ZANNOTATIONUUID
                               FROM ZAEANNOTATION
                               WHERE ZANNOTATIONDELETED = 0 and ZANNOTATIONTYPE = 2
                               ORDER BY ZANNOTATIONMODIFICATIONDATE
                            ''')
                rows = cur.fetchall()
                self.opts.pb.set_maximum(len(rows))
                for row in rows:
                    self.opts.pb.increment()
                    book_id = row[b'ZANNOTATIONASSETID']
                    if not book_id in self.installed_books:
                        continue

                    # Collect the metadata

                    # Sanitize text, note to unicode
                    highlight_text = re.sub('\xa0', ' ',
                                            row[b'ZANNOTATIONSELECTEDTEXT'])
                    highlight_text = UnicodeDammit(highlight_text).unicode
                    highlight_text = highlight_text.rstrip('\n').split('\n')
                    while highlight_text.count(''):
                        highlight_text.remove('')
                    highlight_text = [line.strip() for line in highlight_text]

                    note_text = None
                    if row[b'ZANNOTATIONNOTE']:
                        note_text = UnicodeDammit(
                            row[b'ZANNOTATIONNOTE']).unicode
                        note_text = note_text.rstrip('\n').split('\n')[0]

                    # Populate an AnnotationStruct
                    a_mi = AnnotationStruct()
                    a_mi.annotation_id = row[b'ZANNOTATIONUUID']
                    a_mi.book_id = book_id
                    a_mi.epubcfi = row[b'ZANNOTATIONLOCATION']
                    a_mi.highlight_color = self.HIGHLIGHT_COLORS[
                        row[b'ZANNOTATIONSTYLE']]
                    a_mi.highlight_text = '\n'.join(highlight_text)
                    a_mi.last_modification = row[
                        b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970
                    if a_mi.epubcfi:
                        section = self._get_spine_index(a_mi.epubcfi)
                        try:
                            a_mi.location = self.tocs[book_id]["%.0f" %
                                                               (section - 1)]
                        except:
                            a_mi.location = "Section %d" % section
                        if self.collect_news_clippings and 'News' in self.get_genres(
                                books_db, book_id):
                            a_mi.location_sort = a_mi.last_modification
                        else:
                            a_mi.location_sort = self._generate_location_sort(
                                a_mi.epubcfi)
                    else:
                        if self.collect_news_clippings and 'News' in self.get_genres(
                                books_db, book_id):
                            a_mi.location = self.get_title(books_db, book_id)
                            a_mi.location_sort = a_mi.last_modification

                    a_mi.note_text = note_text

                    # Add annotation
                    self.add_to_annotations_db(cached_db, a_mi)

                    # Update last_annotation in books_db
                    self.update_book_last_annotation(
                        books_db, row[b'ZANNOTATIONMODIFICATIONDATE'] +
                        self.NSTimeIntervalSince1970, book_id)

                self.update_timestamp(cached_db)
                self.commit()

        else:
            self._log(" retrieving cached annotations from %s" % cached_db)
コード例 #17
0
ファイル: Tolino.py プロジェクト: wold5/calibre-annotations
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''
        self._log("%s:get_active_annotations()" % self.app_name)

        self.active_annotations = {}

        self.opts.pb.set_label("Getting active annotations for %s" %
                               self.app_name)
        self.opts.pb.set_value(0)

        # Don't change the template of the _db strings
        #self.books_db = "%s_books_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name))
        #self.annotations_db = "%s_annotations_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name))
        self.annotations_db = self.generate_annotations_db_name(
            self.app_name_, self.opts.device_name)
        self.books_db = self.generate_books_db_name(self.app_name_,
                                                    self.opts.device_name)

        # Create the annotations table
        self.create_annotations_table(self.annotations_db)

        # Parse MyClippings.txt for entries matching installed_books
        self._parse_tolino_notes()

        # Initialize the progress bar
        self.opts.pb.set_label("Getting highlights from %s" % self.app_name)
        self.opts.pb.set_value(0)
        self.opts.pb.show()
        self.opts.pb.set_maximum(len(self.active_annotations))

        # Add annotations to the database
        for timestamp in sorted(self.active_annotations.keys()):
            # Populate an AnnotationStruct with available data
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = self.active_annotations[timestamp]['book_id']
            ann_mi.last_modification = timestamp

            this_is_news = self.collect_news_clippings and 'News' in self.get_genres(
                self.books_db, ann_mi.book_id)

            # Optional items
            if 'annotation_id' in self.active_annotations[timestamp]:
                ann_mi.annotation_id = self.active_annotations[timestamp][
                    'annotation_id']
            if 'highlight_color' in self.active_annotations[timestamp]:
                ann_mi.highlight_color = self.active_annotations[timestamp][
                    'highlight_color']
            if 'highlight_text' in self.active_annotations[timestamp]:
                highlight_text = '\n'.join(
                    self.active_annotations[timestamp]['highlight_text'])
                ann_mi.highlight_text = highlight_text
            if this_is_news:
                ann_mi.location = self.get_title(self.books_db, ann_mi.book_id)
                ann_mi.location_sort = timestamp
            else:
                if 'location' in self.active_annotations[timestamp]:
                    ann_mi.location = self.active_annotations[timestamp][
                        'location']
                if 'location_sort' in self.active_annotations[timestamp]:
                    ann_mi.location_sort = self.active_annotations[timestamp][
                        'location_sort']
            if 'note_text' in self.active_annotations[timestamp]:
                note_text = '\n'.join(
                    self.active_annotations[timestamp]['note_text'])
                ann_mi.note_text = note_text

            # Add annotation to self.annotations_db
            self.add_to_annotations_db(self.annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            # Update last_annotation in self.books_db
            self.update_book_last_annotation(self.books_db, timestamp,
                                             ann_mi.book_id)

        self.opts.pb.hide()

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.commit()
コード例 #18
0
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''

        self._log_location("Start!!!!")
        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" % self.app_name)
        self.opts.pb.set_value(0)

        annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name)
        self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name)
        self._log("%s:get_active_annotations() - annotations_db=%s, books_db=%s" % (self.app_name, annotations_db, self.books_db))

        # Create the annotations table
        self.create_annotations_table(annotations_db)

        self._fetch_annotations()
        # Initialize the progress bar
        self.opts.pb.set_label("Getting highlights from %s" % self.app_name)
        self.opts.pb.set_value(0)
        self.opts.pb.set_maximum(len(self.active_annotations))

#         self._log("%s:get_active_annotations() - self.active_annotations={0}".format(self.active_annotations))
        # Add annotations to the database
        for annotation in sorted(list(self.active_annotations.values()), key=lambda k: (k['book_id'], k['location_sort'], k['last_modification'])):
            # Populate an AnnotationStruct with available data
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = annotation['book_id']
            ann_mi.last_modification = annotation['last_modification']

            # Optional items
            if 'annotation_id' in annotation:
                ann_mi.annotation_id = annotation['annotation_id']
            if 'highlight_color' in annotation:
                ann_mi.highlight_color = annotation['highlight_color']
            if 'highlight_text' in annotation:
#                 self._log("get_active_annotations() - annotation['highlight_text']={0}".format(annotation['highlight_text']))
                highlight_text = annotation['highlight_text']
                ann_mi.highlight_text = highlight_text
            if 'note_text' in annotation:
                note_text = annotation['note_text']
                ann_mi.note_text = note_text
            if 'location' in annotation:
                ann_mi.location = annotation['location']
            if 'location_sort' in annotation:
                ann_mi.location_sort = annotation['location_sort']
#            self._log(ann_mi)

            # Add annotation to annotations_db
            self.add_to_annotations_db(annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

#             self._log("%s:get_active_annotations() - books_db=%s" % (self.app_name, self.books_db))
            # Update last_annotation in books_db
            self.update_book_last_annotation(self.books_db, ann_mi.last_modification, ann_mi.book_id)

        # Update the timestamp
        self.update_timestamp(annotations_db)
        self.commit()
        self._log_location("Finish!!!!")
コード例 #19
0
    def get_active_annotations(self):
        '''
        For each annotation, construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time
        '''

        self._log_location("Start!!!!")
        self._log("%s:get_active_annotations()" % self.app_name)

        self.opts.pb.set_label("Getting active annotations for %s" % self.app_name)
        self.opts.pb.set_value(0)

        annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name)
        self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name)
        self._log("%s:get_active_annotations() - annotations_db=%s, books_db=%s" % (self.app_name, annotations_db, self.books_db))

        # Create the annotations table
        self.create_annotations_table(annotations_db)

        self._fetch_annotations()
        # Initialize the progress bar
        self.opts.pb.set_label("Getting highlights from %s" % self.app_name)
        self.opts.pb.set_value(0)
        self.opts.pb.set_maximum(len(self.active_annotations))

        # Add annotations to the database
        for annotation_id in sorted(self.active_annotations.iterkeys()):
            # Populate an AnnotationStruct with available data
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = self.active_annotations[annotation_id]['book_id']
            ann_mi.last_modification = self.active_annotations[annotation_id]['last_modification']

            # Optional items
            if 'annotation_id' in self.active_annotations[annotation_id]:
                ann_mi.annotation_id = self.active_annotations[annotation_id]['annotation_id']
            if 'highlight_color' in self.active_annotations[annotation_id]:
                ann_mi.highlight_color = self.active_annotations[annotation_id]['highlight_color']
            if 'highlight_text' in self.active_annotations[annotation_id]:
                self._log("get_active_annotations() - self.active_annotations[annotation_id]['highlight_text']={0}".format(self.active_annotations[annotation_id]['highlight_text']))
                highlight_text = self.active_annotations[annotation_id]['highlight_text']
                ann_mi.highlight_text = highlight_text
            if 'note_text' in self.active_annotations[annotation_id]:
                note_text = self.active_annotations[annotation_id]['note_text']
                ann_mi.note_text = note_text
            if 'location' in self.active_annotations[annotation_id]:
                ann_mi.location = self.active_annotations[annotation_id]['location']
            if 'location_sort' in self.active_annotations[annotation_id]:
                ann_mi.location_sort = self.active_annotations[annotation_id]['location_sort']
#            self._log(ann_mi)

            # Add annotation to annotations_db
            self.add_to_annotations_db(annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            self._log("%s:get_active_annotations() - books_db=%s" % (self.app_name, self.books_db))
            # Update last_annotation in books_db
            self.update_book_last_annotation(self.books_db, ann_mi.last_modification, ann_mi.book_id)

        # Update the timestamp
        self.update_timestamp(annotations_db)
        self.commit()
        self._log_location("Finish!!!!")
コード例 #20
0
    def parse_exported_highlights(self, raw):
        """
        Extract highlights from pasted Annotations summary, add them to selected book
        in calibre library

        Construct a BookStruct object with the book's metadata.
        Starred items are minimally required.
           BookStruct properties:
            *active: [True|False]
            *author: "John Smith"
             author_sort: (if known)
            *book_id: an int uniquely identifying the book.
                     Highlights are associated with books through book_id
             genre: "Fiction" (if known)
            *title: "The Story of John Smith"
             title_sort: "Story of John Smith, The" (if known)
             uuid: Calibre's uuid for this book, if known

        Construct an AnnotationStruct object with the
        highlight's metadata. Starred items are minimally required. Dashed items
        (highlight_text and note_text) may be one or both.
          AnnotationStruct properties:
            annotation_id: an int uniquely identifying the annotation
           *book_id: The book this annotation is associated with
            highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow]
           -highlight_text: A list of paragraphs constituting the highlight
            last_modification: The timestamp of the annotation
            location: location of highlight in the book
           -note_text: A list of paragraphs constituting the note
           *timestamp: Unique timestamp of highlight's creation/modification time

        """
        self._log("%s:parse_exported_highlight()" % self.app_name)

        # Create the annotations, books table as needed
        self.annotations_db = "%s_imported_annotations" % self.app_name_
        self.create_annotations_table(self.annotations_db)
        self.books_db = "%s_imported_books" % self.app_name_
        self.create_books_table(self.books_db)

        self.annotated_book_list = []
        self.selected_books = None

        # Generate the book metadata from the selected book
        row = self.opts.gui.library_view.currentIndex()
        book_id = self.opts.gui.library_view.model().id(row)
        db = self.opts.gui.current_db
        mi = db.get_metadata(book_id, index_is_id=True)

        # Populate author, title at a minimum
        title = "A Book With Some Exported Annotations"
        author = "John Smith"

        # Populate a BookStruct
        book_mi = BookStruct()
        book_mi.active = True
        book_mi.author = author
        book_mi.book_id = mi.id
        book_mi.title = title
        book_mi.uuid = None
        book_mi.last_update = time.mktime(time.localtime())
        book_mi.reader_app = self.app_name
        book_mi.cid = mi.id
        book_mi.annotations = len(self.highlights)

        # Add annotations to the database
        for timestamp in sorted(self.highlights.iterkeys()):
            book_mi.last_update = timestamp

            # Populate an AnnotationStruct
            ann_mi = AnnotationStruct()

            # Required items
            ann_mi.book_id = book_mi['book_id']
            ann_mi.last_modification = timestamp

            # Optional items
            if 'annotation_id' in self.highlights[timestamp]:
                ann_mi.annotation_id = self.highlights[timestamp]['annotation_id']
            if 'highlight_color' in self.highlights[timestamp]:
                ann_mi.highlight_color = self.highlights[timestamp]['highlight_color']
            if 'highlight_text' in self.highlights[timestamp]:
                highlight_text = '\n'.join(self.highlights[timestamp]['highlight_text'])
                ann_mi.highlight_text = highlight_text
            if 'note_text' in self.highlights[timestamp]:
                note_text = '\n'.join(self.highlights[timestamp]['note_text'])
                ann_mi.note_text = note_text

            # Add annotation to annotations_db
            self.add_to_annotations_db(self.annotations_db, ann_mi)

            # Increment the progress bar
            self.opts.pb.increment()

            # Update last_annotation in books_db
            self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id)

        # Add book to books_db
        self.add_to_books_db(self.books_db, book_mi)
        self.annotated_book_list.append(book_mi)

        # Update the timestamp
        self.update_timestamp(self.annotations_db)
        self.update_timestamp(self.books_db)
        self.commit()

        # Return True if successful
        return True