コード例 #1
0
ファイル: InfoQ.py プロジェクト: SkyRaker/calibre-recipes
    def postprocess_html(self, soup, first_fetch):
        author_general = soup.find('span', { 'class': 'author_general' })
        author_general.em.extract()
    
        # the complete content
        full_div = None
    
        transcript_div = soup.find('div', { 'id': 'transcript' })
        if transcript_div: # that's an interview
            # get all <div class="qa" />
            qa_div_list = list(find_by_class(transcript_div, 'div', 'qa'))
            for qa_div in qa_div_list:
                qa_div.extract()
                
                # replace all <a class="question_link">...</a> with <strong>...</strong>
                question_link = qa_div.find('a', { 'class': 'question_link' })
                question_strong = Tag(soup, 'strong')
                question_strong.append(question_link.string)
                question_link.replaceWith(question_strong)
            
            full_div = find_by_class(soup.find('div', { 'id': 'content' }), 'div', 'presentation_full').next()
            
            # clean the <h1 />
            full_div.h1.span.extract()
            title_div = full_div.h1.div
            title_div.replaceWith(title_div.string)
            
            # clear the presentation area
            for div in full_div.findAll('div'):
                div.extract()
            
            # add qa list back to presentation area
            for qa_div in qa_div_list:
                full_div.append(qa_div)
        else:
            # text only without title
            text_div = find_by_class(soup, 'div', 'text_info').next()
            text_div.extract()
            
            for other in text_div.findAll('div'):
                other.extract()
            
            # full_div contains title
            full_div = soup.find('div', { 'id': 'content' })
            for other in full_div.findAll('div'):
                other.extract()
            
            full_div.append(text_div)

        # keep full_div in <body /> only
        full_div.extract()
        
        for other in soup.body:
            other.extract()
            
        soup.body.append(full_div)

        return soup
コード例 #2
0
 def construct(self, book_notes):
     '''
     Given a list of notes, render HTML
     '''
     soup = None
     if book_notes:
         soup = BeautifulSoup('''<div class="{0}"></div>'''.format('book_notes'))
         for note in book_notes:
             div_tag = Tag(soup, 'div', [('class', "book_note")])
             p_tag = Tag(soup, 'p', [('class', "book_note"),
                                     ('style', "{0}".format(self._get_note_style()))])
             p_tag.append(note)
             div_tag.append(p_tag)
             soup.div.append(div_tag)
     return soup
コード例 #3
0
 def construct(self, book_notes):
     '''
     Given a list of notes, render HTML
     '''
     soup = None
     if book_notes:
         soup = BeautifulSoup(
             '''<div class="{0}"></div>'''.format('book_notes'))
         for note in book_notes:
             div_tag = Tag(soup, 'div', [('class', "book_note")])
             p_tag = Tag(soup, 'p',
                         [('class', "book_note"),
                          ('style', "{0}".format(self._get_note_style()))])
             p_tag.append(note)
             div_tag.append(p_tag)
             soup.div.append(div_tag)
     return soup
コード例 #4
0
    def to_HTML(self, header=''):
        '''
        Generate HTML with user-specified CSS, element order
        '''
        # Retrieve CSS prefs
        from calibre_plugins.annotations.appearance import default_elements
        stored_css = plugin_prefs.get('appearance_css', default_elements)

        elements = []
        for element in stored_css:
            elements.append(element['name'])
            if element['name'] == 'Note':
                note_style = re.sub('\n', '', element['css'])
            elif element['name'] == 'Text':
                text_style = re.sub('\n', '', element['css'])
            elif element['name'] == 'Timestamp':
                ts_style = re.sub('\n', '', element['css'])

        # Additional CSS for timestamp color and bg to be formatted
        datetime_style = ("background-color:{0};color:{1};" + ts_style)

        # Order the elements according to stored preferences
        comments_body = ''
        for element in elements:
            if element == 'Text':
                comments_body += '{text}'
            elif element == 'Note':
                comments_body += '{note}'
            elif element == 'Timestamp':
                ts_css = '''<table cellpadding="0" width="100%" style="{ts_style}" color="{color}">
                                <tr>
                                    <td class="location" style="text-align:left">{location}</td>
                                    <td class="timestamp" uts="{unix_timestamp}" style="text-align:right">{friendly_timestamp}</td>
                                </tr>
                            </table>'''
                comments_body += re.sub(r'>\s+<', r'><', ts_css)
#         self._log_location("comments_body='%s'" % comments_body)

        if self.annotations:
            soup = BeautifulSoup(ANNOTATIONS_HEADER)
            dtc = 0

            # Add the annotations
            for i, agroup in enumerate(
                    sorted(self.annotations, key=self._annotation_sorter)):
                #                 self._log_location("agroup='%s'" % agroup)
                location = agroup.location
                if location is None:
                    location = ''

                friendly_timestamp = self._timestamp_to_datestr(
                    agroup.timestamp)

                text = ''
                if agroup.text:
                    #                     self._log_location("agroup.text='%s'" % agroup.text)
                    for agt in agroup.text:
                        #                         self._log_location("agt='%s'" % agt)
                        text += '<p class="highlight" style="{0}">{1}</p>'.format(
                            text_style, agt)

                note = ''
                if agroup.note:
                    #                     self._log_location("agroup.note='%s'" % agroup.note)
                    for agn in agroup.note:
                        #                         self._log_location("agn='%s'" % agn)
                        note += '<p class="note" style="{0}">{1}</p>'.format(
                            note_style, agn)

                try:
                    dt_bgcolor = COLOR_MAP[agroup.highlightcolor]['bg']
                    dt_fgcolor = COLOR_MAP[agroup.highlightcolor]['fg']
                except:
                    if agroup.highlightcolor is None:
                        msg = "No highlight color specified, using Default"
                    else:
                        msg = "Unknown color '%s' specified" % agroup.highlightcolor
                    self._log_location(msg)
                    dt_bgcolor = COLOR_MAP['Default']['bg']
                    dt_fgcolor = COLOR_MAP['Default']['fg']

                if agroup.hash is not None:
                    # Use existing hash when re-rendering
                    annotation_hash = agroup.hash
                else:
                    m = hashlib.md5()
                    m.update(text.encode('utf-8'))
                    m.update(note.encode('utf-8'))
                    annotation_hash = m.hexdigest()

                try:
                    ka_soup = BeautifulSoup()
                    divTag = ka_soup.new_tag('div')
#                     self._log_location("Used ka_soup.new_tag to create tag: %s" % divTag)
                except:
                    divTag = Tag(BeautifulSoup(), 'div')


#                     self._log_location("Used Tag(BeautifulSoup() to create tag: %s" % divTag)

                content_args = {
                    'color': agroup.highlightcolor,
                    'friendly_timestamp': friendly_timestamp,
                    'location': location,
                    'note': note,
                    'text': text,
                    'ts_style': datetime_style.format(dt_bgcolor, dt_fgcolor),
                    'unix_timestamp': agroup.timestamp,
                }
                #                 self._log_location("Generated comment soup: %s" % BeautifulSoup(comments_body.format(**content_args)))
                comments_body_soup = BeautifulSoup(
                    comments_body.format(**content_args))
                #                 self._log_location("Generated comment soup: comments_body_soup=%s" % comments_body_soup)
                #                 self._log_location("Generated comment soup: comments_body_soup.body=%s" % comments_body_soup.body)
                #                 self._log_location("Generated comment soup: comments_body_soup.body.children=%s" % comments_body_soup.body.children)
                #                 self._log_location("Generated comment soup: comments_body_soup.body.contents=%s" % comments_body_soup.body.contents)
                #                 self._log_location("Generated comment soup: len(comments_body_soup.body.contents)=%s" % len(comments_body_soup.body.contents))
                #                 for i in range(0, len(comments_body_soup.body.contents)):
                #                     self._log_location("i=%s" % i)
                #                     self._log_location("comment_body_tag=%s" % comments_body_soup.body.contents[i])
                while len(comments_body_soup.body.contents) > 0:
                    #                     self._log_location("comment_body_tag=%s" % comments_body_soup.body.contents[0])
                    divTag.append(comments_body_soup.body.contents[0])
                divTag['class'] = "annotation"
                divTag['genre'] = ''
                if agroup.genre:
                    divTag['genre'] = escape(agroup.genre)
                divTag['hash'] = annotation_hash
                divTag['location_sort'] = agroup.location_sort
                divTag['reader'] = agroup.reader_app
                divTag['style'] = ANNOTATION_DIV_STYLE
                #                 self._log_location("An annotation - divTag=%s" % divTag)
                soup.div.insert(dtc, divTag)
                #                 self._log_location("Full soup after adding annotation - soup=%s" % soup)
                dtc += 1
                if i < len(self.annotations) - 1 and \
                    plugin_prefs.get('appearance_hr_checkbox', False):
                    soup.div.insert(
                        dtc,
                        BeautifulSoup(
                            plugin_prefs.get('HORIZONTAL_RULE',
                                             '<hr width="80%" />')))
                    dtc += 1

        else:
            soup = BeautifulSoup(ANNOTATIONS_HEADER)
        return unicode(soup)
コード例 #5
0
ファイル: InfoQ.py プロジェクト: xuxiandi/calibre-recipes
    def postprocess_html(self, soup, first_fetch):
        author_general = soup.find('span', {'class': 'author_general'})
        author_general.em.extract()

        # the complete content
        full_div = None

        transcript_div = soup.find('div', {'id': 'transcript'})
        if transcript_div:  # that's an interview
            # get all <div class="qa" />
            qa_div_list = list(find_by_class(transcript_div, 'div', 'qa'))
            for qa_div in qa_div_list:
                qa_div.extract()

                # replace all <a class="question_link">...</a> with <strong>...</strong>
                question_link = qa_div.find('a', {'class': 'question_link'})
                question_strong = Tag(soup, 'strong')
                question_strong.append(question_link.string)
                question_link.replaceWith(question_strong)

            full_div = find_by_class(soup.find('div', {'id': 'content'}),
                                     'div', 'presentation_full').next()

            # clean the <h1 />
            full_div.h1.span.extract()
            title_div = full_div.h1.div
            title_div.replaceWith(title_div.string)

            # clear the presentation area
            for div in full_div.findAll('div'):
                div.extract()

            # add qa list back to presentation area
            for qa_div in qa_div_list:
                full_div.append(qa_div)
        else:
            # text only without title
            text_div = find_by_class(soup, 'div', 'text_info').next()
            text_div.extract()

            for other in text_div.findAll('div'):
                other.extract()

            # full_div contains title
            full_div = soup.find('div', {'id': 'content'})
            for other in full_div.findAll('div'):
                other.extract()

            full_div.append(text_div)

        full_div.extract()

        nav_div = soup.body.div
        nav_div.extract()

        # keep nav_div and full_div in <body /> only
        for other in soup.body:
            other.extract()

        soup.body.append(nav_div)
        soup.body.append(full_div)

        return soup
コード例 #6
0
def merge_annotations(parent, cid, old_soup, new_soup):
    '''
    old_soup, new_soup: BeautifulSoup()
    Need to strip <hr>, re-sort based on location, build new merged_soup
    with optional interleaved <hr> elements.
    '''
    TRANSIENT_DB = 'transient'

    if False:
        '''
        Older technique: Use hashes to merge annotations
        '''
        #Get the hashes of any existing annotations
        oiuas = old_soup.findAll('div', 'annotation')
        old_hashes = set([ua['hash'] for ua in oiuas])

        # Extract old user_annotations
        ouas = old_soup.find('div', 'user_annotations')
        if ouas:
            ouas.extract()

            # Capture existing annotations
            parent.opts.db.capture_content(ouas, cid, TRANSIENT_DB)

            # Regurgitate old_soup with current CSS
            regurgitated_soup = BeautifulSoup(
                parent.opts.db.rerender_to_html(TRANSIENT_DB, cid))

        # Find new annotations
        uas = new_soup.findAll('div', 'annotation')
        new_hashes = set([ua['hash'] for ua in uas])

        updates = list(new_hashes.difference(old_hashes))
        if len(updates) and ouas is not None:
            # Append new to regurgitated
            dtc = len(regurgitated_soup.div)
            for new_annotation_id in updates:
                new_annotation = new_soup.find('div',
                                               {'hash': new_annotation_id})
                regurgitated_soup.div.insert(dtc, new_annotation)
                dtc += 1
            if old_soup:
                merged_soup = unicode(old_soup) + unicode(
                    sort_merged_annotations(regurgitated_soup))
            else:
                merged_soup = unicode(
                    sort_merged_annotations(regurgitated_soup))
        else:
            if old_soup:
                merged_soup = unicode(old_soup) + unicode(new_soup)
            else:
                merged_soup = unicode(new_soup)
        return merged_soup

    else:
        '''
        Newer technique: Use timestamps to merge annotations
        '''
        timestamps = {}
        # Get the timestamps and hashes of the stored annotations
        suas = old_soup.findAll('div', 'annotation')
        for sua in suas:
            #print("sua: %s" % sua.prettify())
            timestamp = sua.find('td', 'timestamp')['uts']
            timestamps[timestamp] = {'stored_hash': sua['hash']}

        # Rerender stored annotations
        ouas = old_soup.find('div', 'user_annotations')
        if ouas:
            ouas.extract()

            # Capture existing annotations
            parent.opts.db.capture_content(ouas, cid, TRANSIENT_DB)

            # Regurgitate annotations with current CSS
            rerendered_annotations = parent.opts.db.rerender_to_html(
                TRANSIENT_DB, cid)
            regurgitated_soup = BeautifulSoup(rerendered_annotations)

        # Add device annotation timestamps and hashes
        duas = new_soup.findAll('div', 'annotation')
        for dua in duas:
            timestamp = dua.find('td', 'timestamp')['uts']
            if timestamp in timestamps:
                timestamps[timestamp]['device_hash'] = dua['hash']
            else:
                timestamps[timestamp] = {'device_hash': dua['hash']}

        merged_annotations = Tag(BeautifulSoup(), 'div',
                                 [('class', "user_annotations"),
                                  ('style', 'margin:0')])

        for ts in sorted(timestamps):
            if 'stored_hash' in timestamps[
                    ts] and not 'device_hash' in timestamps[ts]:
                # Stored only - add from regurgitated_soup
                annotation = regurgitated_soup.find(
                    'div', {'hash': timestamps[ts]['stored_hash']})

            elif not 'stored_hash' in timestamps[
                    ts] and 'device_hash' in timestamps[ts]:
                # Device only - add from new_soup
                annotation = new_soup.find(
                    'div', {'hash': timestamps[ts]['device_hash']})

            elif timestamps[ts]['stored_hash'] == timestamps[ts][
                    'device_hash']:
                # Stored matches device - add from regurgitated_soup, as user may have modified
                annotation = regurgitated_soup.find(
                    'div', {'hash': timestamps[ts]['stored_hash']})

            elif timestamps[ts]['stored_hash'] != timestamps[ts]['device_hash']:
                # Device has been updated since initial capture - add from new_soup
                annotation = new_soup.find(
                    'div', {'hash': timestamps[ts]['device_hash']})

            else:
                continue

            merged_annotations.append(annotation)

        merged_annotations = sort_merged_annotations(merged_annotations)

        # Update new_soup with merged_annotations
        new_soup_uas = new_soup.find('div', 'user_annotations')
        new_soup_uas.replaceWith(merged_annotations)

        return unicode(new_soup)
コード例 #7
0
def merge_annotations(parent, cid, old_soup, new_soup):
    '''
    old_soup, new_soup: BeautifulSoup()
    Need to strip <hr>, re-sort based on location, build new merged_soup
    with optional interleaved <hr> elements.
    '''
    TRANSIENT_DB = 'transient'

    if False:
        '''
        Older technique: Use hashes to merge annotations
        '''
        #Get the hashes of any existing annotations
        oiuas = old_soup.findAll('div', 'annotation')
        old_hashes = set([ua['hash'] for ua in oiuas])

        # Extract old user_annotations
        ouas = old_soup.find('div', 'user_annotations')
        if ouas:
            ouas.extract()

            # Capture existing annotations
            parent.opts.db.capture_content(ouas, cid, TRANSIENT_DB)

            # Regurgitate old_soup with current CSS
            regurgitated_soup = BeautifulSoup(parent.opts.db.rerender_to_html(TRANSIENT_DB, cid))

        # Find new annotations
        uas = new_soup.findAll('div', 'annotation')
        new_hashes = set([ua['hash'] for ua in uas])

        updates = list(new_hashes.difference(old_hashes))
        if len(updates) and ouas is not None:
            # Append new to regurgitated
            dtc = len(regurgitated_soup.div)
            for new_annotation_id in updates:
                new_annotation = new_soup.find('div', {'hash': new_annotation_id})
                regurgitated_soup.div.insert(dtc, new_annotation)
                dtc += 1
            if old_soup:
                merged_soup = unicode(old_soup) + unicode(sort_merged_annotations(regurgitated_soup))
            else:
                merged_soup = unicode(sort_merged_annotations(regurgitated_soup))
        else:
            if old_soup:
                merged_soup = unicode(old_soup) + unicode(new_soup)
            else:
                merged_soup = unicode(new_soup)
        return merged_soup

    else:
        '''
        Newer technique: Use timestamps to merge annotations
        '''
        timestamps = {}
        # Get the timestamps and hashes of the stored annotations
        suas = old_soup.findAll('div', 'annotation')
        for sua in suas:
            #print("sua: %s" % sua.prettify())
            timestamp = sua.find('td', 'timestamp')['uts']
            timestamps[timestamp] = {'stored_hash': sua['hash']}

        # Rerender stored annotations
        ouas = old_soup.find('div', 'user_annotations')
        if ouas:
            ouas.extract()

            # Capture existing annotations
            parent.opts.db.capture_content(ouas, cid, TRANSIENT_DB)

            # Regurgitate annotations with current CSS
            rerendered_annotations = parent.opts.db.rerender_to_html(TRANSIENT_DB, cid)
            regurgitated_soup = BeautifulSoup(rerendered_annotations)

        # Add device annotation timestamps and hashes
        duas = new_soup.findAll('div', 'annotation')
        for dua in duas:
            timestamp = dua.find('td', 'timestamp')['uts']
            if timestamp in timestamps:
                timestamps[timestamp]['device_hash'] = dua['hash']
            else:
                timestamps[timestamp] = {'device_hash': dua['hash']}

        merged_annotations = Tag(BeautifulSoup(), 'div',
            [('class', "user_annotations"), ('style','margin:0')])

        for ts in sorted(timestamps):
            if 'stored_hash' in timestamps[ts] and not 'device_hash' in timestamps[ts]:
                # Stored only - add from regurgitated_soup
                annotation = regurgitated_soup.find('div', {'hash': timestamps[ts]['stored_hash']})

            elif not 'stored_hash' in timestamps[ts] and 'device_hash' in timestamps[ts]:
                # Device only - add from new_soup
                annotation = new_soup.find('div', {'hash': timestamps[ts]['device_hash']})

            elif timestamps[ts]['stored_hash'] == timestamps[ts]['device_hash']:
                # Stored matches device - add from regurgitated_soup, as user may have modified
                annotation = regurgitated_soup.find('div', {'hash': timestamps[ts]['stored_hash']})

            elif timestamps[ts]['stored_hash'] != timestamps[ts]['device_hash']:
                # Device has been updated since initial capture - add from new_soup
                annotation = new_soup.find('div', {'hash': timestamps[ts]['device_hash']})

            else:
                continue

            merged_annotations.append(annotation)

        merged_annotations = sort_merged_annotations(merged_annotations)

        # Update new_soup with merged_annotations
        new_soup_uas = new_soup.find('div', 'user_annotations')
        new_soup_uas.replaceWith(merged_annotations)

        return unicode(new_soup)