Ejemplo n.º 1
0
def dgu_linked_user(user, maxlength=16):  # Overwrite h.linked_user
    from ckan import model
    from ckan.lib.base import h
    from ckanext.dgu.plugins_toolkit import c

    if user in [model.PSEUDO_USER__LOGGED_IN, model.PSEUDO_USER__VISITOR]:
        return user
    if not isinstance(user, model.User):
        user_name = unicode(user)
        user = model.User.get(user_name)
    if not user:
        # may be in the format "NHS North Staffordshire (uid 6107 )"
        match = re.match(".*\(uid (\d+)\s?\)", user_name)
        if match:
            drupal_user_id = match.groups()[0]
            user = model.User.get("user_d%s" % drupal_user_id)

    if c.is_an_official:
        # only officials can see the actual user name
        if user:
            publisher = ", ".join([group.title for group in user.get_groups("publisher")])

            display_name = "%s (%s)" % (user.fullname, publisher)
            link_text = truncate(user.fullname or user.name, length=maxlength)
            return h.link_to(link_text, h.url_for(controller="user", action="read", id=user.name))
        else:
            return truncate(user_name, length=maxlength)
    else:
        # joe public just gets a link to the user's publisher(s)
        import ckan.authz

        if user:
            groups = user.get_groups("publisher")
            if groups:
                return h.literal(
                    " ".join(
                        [
                            h.link_to(truncate(group.title, length=maxlength), "/publisher/%s" % group.name)
                            for group in groups
                        ]
                    )
                )
            elif ckan.authz.Authorizer().is_sysadmin(user):
                return "System Administrator"
            else:
                return "Staff"
        else:
            return "Staff"
Ejemplo n.º 2
0
def truncate_html(*args):
    document = truncate(*args)
    parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
    document = parser.parse(document)

    xml = document.getElementsByTagName('body')[0].childNodes[0].toxml()
    return xml
Ejemplo n.º 3
0
 def _get_text(self):
     if self._text is None:
         text = markdown_to_plain_text(self.event.text(),
                                       safe_mode='remove')
         self._text = truncate(text, length=160,
                               indicator="...", whole_word=True)
     return self._text
Ejemplo n.º 4
0
def latest_post():
    '''Return the most recent blog post.

    Returns None if there are no blog posts.

    :rtype: ckanext.sweden.blog.model.post.Post or None

    '''
    try:
        from ckanext.sweden.blog.model.post import Post
        post = Session.query(Post).\
            filter(Post.visible == True).\
            order_by('created desc').\
            first()
    except NoResultFound:
        return None

    if post is None:
        return None

    post.content_markdown = markdown(
        unicode(truncate(post.content, length=320, indicator='...',
                         whole_word=True)))
    post.post_author = (model.User.get(post.user_id)
                        or Session.query(model.User).filter_by(
                        id=post.user_id).first())

    return post
Ejemplo n.º 5
0
def _prepare_resource_url(res):
    """Convert list of resources to files_list for data.world.
    """
    link = res['url'] or ''
    name = res['name'] or ''
    link_name, link_ext = os.path.splitext(os.path.basename(link))
    file_name, file_ext = os.path.splitext(os.path.basename(name))

    existing_format = res.get('format')
    if existing_format:
        ext = '.' + existing_format.lower()
    elif file_ext:
        ext = file_ext
    else:
        ext = link_ext.split('#').pop(0).split('?').pop(0)

    prepared_data = dict(name=(file_name or link_name) + ext,
                         source=dict(url=link))
    description = res.get('description', '')

    if description:

        prepared_data['description'] = truncate(description,
                                                120,
                                                whole_word=True)

    return prepared_data
Ejemplo n.º 6
0
def truncate_xhtml(string, size, _strip_xhtml=False, _decode_entities=False):
    """Truncate a XHTML string to roughly a given size (full words).

    :param string: XHTML
    :type string: unicode
    :param size: Max length
    :param _strip_xhtml: Flag to strip out all XHTML
    :param _decode_entities: Flag to convert XHTML entities to unicode chars
    :rtype: unicode
    """
    if not string:
        return u''

    if _strip_xhtml:
        # Insert whitespace after block elements.
        # So they are separated when we strip the xhtml.
        string = block_spaces.sub(u"\\1 ", string)
        string = strip_xhtml(string)

    string = decode_entities(string)

    if len(string) > size:
        string = text.truncate(string, length=size, whole_word=True)

        if _strip_xhtml:
            if not _decode_entities:
                # re-encode the entities, if we have to.
                string = encode_entities(string)
        else:
            string = clean(string, **cleaner_settings)

    return string.strip()
Ejemplo n.º 7
0
def markdown_extract(text):
    if (text is None) or (text == ''):
        return ''
    html = fromstring(markdown(text))
    plain = html.xpath("string()")
    return unicode(
        truncate(plain, length=190, indicator='...', whole_word=True))
Ejemplo n.º 8
0
def truncate_xhtml(string, size, _strip_xhtml=False, _decode_entities=False):
    """Truncate a XHTML string to roughly a given size (full words).

    :param string: XHTML
    :type string: unicode
    :param size: Max length
    :param _strip_xhtml: Flag to strip out all XHTML
    :param _decode_entities: Flag to convert XHTML entities to unicode chars
    :rtype: unicode
    """
    if not string:
        return u''

    if _strip_xhtml:
        # Insert whitespace after block elements.
        # So they are separated when we strip the xhtml.
        string = block_spaces.sub(u"\\1 ", string)
        string = strip_xhtml(string)

    string = decode_entities(string)

    if len(string) > size:
        string = text.truncate(string, length=size, whole_word=True)

        if _strip_xhtml:
            if not _decode_entities:
                # re-encode the entities, if we have to.
                string = encode_entities(string)
        else:
            string = clean(string, **cleaner_settings)

    return string.strip()
Ejemplo n.º 9
0
def truncate_html(*args):
    document = truncate(*args)
    parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
    document = parser.parse(document)

    xml = document.getElementsByTagName("body")[0].childNodes[0].toxml()
    return xml
Ejemplo n.º 10
0
def markdown_preview(text, length=140):
    if not text:
        return ""
    md = html.fromstring(unicode(markdown(text)))
    text = md.text_content()
    if length:
        text = truncate(text, length=length, whole_word=True)
    return text
Ejemplo n.º 11
0
def markdown_preview(text, length=140):
    if not text:
        return ''
    md = html.fromstring(unicode(markdown(text)))
    text = md.text_content()
    if length:
        text = truncate(text, length=length, whole_word=True)
    return text
Ejemplo n.º 12
0
 def _get_text(self):
     if self._text is None:
         text = markdown_to_plain_text(self.event.text(),
                                       safe_mode='remove')
         self._text = truncate(text,
                               length=160,
                               indicator="...",
                               whole_word=True)
     return self._text
Ejemplo n.º 13
0
 def content_short(self):
     """
     AllanC TODO: Derived field - Postgress trigger needed
     """
     from cbutils.text import strip_html_tags
     return truncate(strip_html_tags(self.content).strip(),
                     length=300,
                     indicator='...',
                     whole_word=True)
Ejemplo n.º 14
0
def markdown_extract(text, extract_length=190):
    if (text is None) or (text.strip() == ''):
        return ''
    plain = re.sub(r'<.*?>', '', markdown(text))
    return unicode(
        truncate(plain,
                 length=extract_length,
                 indicator='...',
                 whole_word=True))
Ejemplo n.º 15
0
def markdown_extract_filter(source, extract_length=190):
    from ckan.lib.helpers import markdown
    if not source or not source.strip():
        return ''

    extracted = bleach.clean(markdown(source), tags=[], strip=True)

    if not extract_length or len(extracted) < extract_length:
        return Markup(extracted)
    return Markup(unicode(truncate(extracted, length=extract_length, indicator='...', whole_word=True)))
Ejemplo n.º 16
0
def markdown_extract(text, extract_length=190):
    """ return the plain text representation of markdown encoded text.  That
    is the texted without any html tags.  If extract_length is 0 then it
    will not be truncated."""
    if (text is None) or (text.strip() == ""):
        return ""
    plain = re.sub(r"<.*?>", "", markdown(text))
    if not extract_length or len(plain) < extract_length:
        return plain
    return literal(unicode(truncate(plain, length=extract_length, indicator="...", whole_word=True)))
Ejemplo n.º 17
0
def dgu_linked_user(user, maxlength=16):  # Overwrite h.linked_user
    from ckan import model
    from ckan.lib.base import h
    from ckanext.dgu.plugins_toolkit import c

    if user in [model.PSEUDO_USER__LOGGED_IN, model.PSEUDO_USER__VISITOR]:
        return user
    if not isinstance(user, model.User):
        user_name = unicode(user)
        user = model.User.get(user_name)
    if not user:
        # may be in the format "NHS North Staffordshire (uid 6107 )"
        match = re.match('.*\(uid (\d+)\s?\)', user_name)
        if match:
            drupal_user_id = match.groups()[0]
            user = model.User.get('user_d%s' % drupal_user_id)

    if (c.is_an_official):
        # only officials can see the actual user name
        if user:
            publisher = ', '.join([group.title for group in user.get_groups('publisher')])

            display_name = '%s (%s)' % (user.fullname, publisher)
            link_text = truncate(user.fullname or user.name, length=maxlength)
            return h.link_to(link_text,
                             h.url_for(controller='user', action='read', id=user.name))
        else:
            return truncate(user_name, length=maxlength)
    else:
        # joe public just gets a link to the user's publisher(s)
        import ckan.authz
        if user:
            groups = user.get_groups('publisher')
            if groups:
                return h.literal(' '.join([h.link_to(truncate(group.title, length=maxlength),
                                                     '/publisher/%s' % group.name) \
                                         for group in groups]))
            elif ckan.authz.Authorizer().is_sysadmin(user):
                return 'System Administrator'
            else:
                return 'Staff'
        else:
            return 'Staff'
Ejemplo n.º 18
0
def markdown_preview(text, length=150):
    if not text:
        return ''
    try:
        md = html.fromstring(unicode(markdown(text)))
        text = md.text_content()
    except:
        pass
    if length: 
        text = truncate(text, length=length, whole_word=True)
    return text.replace('\n', ' ')
Ejemplo n.º 19
0
def markdown_preview(text, length=150):
    if not text:
        return ''
    try:
        md = html.fromstring(unicode(markdown(text)))
        text = md.text_content()
    except:
        pass
    if length:
        text = truncate(text, length=length, whole_word=True)
    return text.replace('\n', ' ')
Ejemplo n.º 20
0
def truncate(string, size, whole_word=True):
    """Truncate a plaintext string to roughly a given size (full words).

    :param string: plaintext
    :type string: unicode
    :param size: Max length
    :param whole_word: Whether to prefer truncating at the end of a word.
        Defaults to True.
    :rtype: unicode
    """
    return text.truncate(string, size, whole_word=whole_word)
Ejemplo n.º 21
0
def truncate(string, size, whole_word=True):
    """Truncate a plaintext string to roughly a given size (full words).

    :param string: plaintext
    :type string: unicode
    :param size: Max length
    :param whole_word: Whether to prefer truncating at the end of a word.
        Defaults to True.
    :rtype: unicode
    """
    return text.truncate(string, size, whole_word=whole_word)
Ejemplo n.º 22
0
 def lines(self):
     from webhelpers.text import truncate
     if self.text is None:
         return
     for line in self.text.strip().split("\n"):
         while len(line.rstrip()) > self.LINE_LENGTH:
             part = truncate(line, length=self.LINE_LENGTH, indicator='',
                             whole_word=True)
             line = line[len(part):]
             line = line.lstrip()
             yield part
         yield line
Ejemplo n.º 23
0
 def json(self):
     "recent messages json"
     value = 'white'
     if (self.spam and not self.highspam and not self.blacklisted
         and not self.nameinfected and not self.otherinfected
         and not self.virusinfected):
         value = 'spam'
     if self.highspam and (not self.blacklisted):
         value = 'highspam'
     if self.whitelisted:
         value = 'whitelisted'
     if self.blacklisted:
         value = 'blacklisted'
     if self.nameinfected or self.virusinfected or self.otherinfected:
         value = 'infected'
     if not self.scaned:
         value = 'gray'
     if (self.spam and (not self.blacklisted)
         and (not self.virusinfected)
         and (not self.nameinfected)
         and (not self.otherinfected)):
         status = _('Spam')
     if self.blacklisted:
         status = _('BS')
     if (self.virusinfected or
            self.nameinfected or
            self.otherinfected):
         status = _('Infected')
     if ((not self.spam) and (not self.virusinfected)
            and (not self.nameinfected)
            and (not self.otherinfected)
            and (not self.whitelisted)):
         status = _('Clean')
     if self.whitelisted:
         status = _('AS')
     if not self.scaned:
         status = _('NS')
     return dict(
                 id=self.id,
                 timestamp=self.timestamp.strftime('%Y-%m-%d %H:%M:%S %Z'),
                 sascore=self.sascore,
                 size=format_byte_size(self.size),
                 subject=escape(truncate((self.subject and
                                         self.subject.strip())
                                         or '---', 50)),
                 from_address=escape(
                     wrap_paragraphs(self.from_address, 32)),
                 to_address=escape(wrap_paragraphs(self.to_address
                                                     or '---', 32)),
                 style=value,
                 status=status,
             )
Ejemplo n.º 24
0
def jsonify_msg_list(element):
    """
    Fixes the converting error in converting
    DATETIME objects to JSON
    """
    value = 'white'
    if (element.spam and not element.highspam and not element.blacklisted
        and not element.nameinfected and not element.otherinfected 
        and not element.virusinfected):
        value = 'spam'
    if element.highspam and (not element.blacklisted):
        value = 'highspam'
    if element.whitelisted:
        value = 'whitelisted'
    if element.blacklisted:
        value = 'blacklisted'
    if (element.nameinfected or element.virusinfected or
        element.otherinfected):
        value = 'infected'
    if not element.scaned:
        value = 'gray'
    if (element.spam and (not element.blacklisted) 
        and (not element.virusinfected) 
        and (not element.nameinfected) 
        and (not element.otherinfected)):
        status = _('Spam')
    if element.blacklisted:
        status = _('BL')
    if (element.virusinfected or 
           element.nameinfected or 
           element.otherinfected):
        status = _('Infected')
    if ((not element.spam) and (not element.virusinfected) 
           and (not element.nameinfected) 
           and (not element.otherinfected) 
           and (not element.whitelisted)):
        status = _('Clean')
    if element.whitelisted:
        status = _('WL')
    if not element.scaned:
        status = _('NS')
    return dict(
                id=element.id,
                timestamp=element.timestamp.strftime('%A, %d %b %Y %H:%M:%S %Z'),
                sascore=element.sascore,
                size=format_byte_size(element.size),
                subject=truncate(escape(element.subject), 50),
                from_address=wrap_paragraphs(escape(element.from_address), 32),
                to_address=wrap_paragraphs(escape(element.to_address), 32),
                style=value,
                status=status,
            )
def report():
    # report on top level publishers
    from ckan import model
    log = global_log
    log.info('Summary of top level publishers:')
    publishers = without_contact = without_foi = 0
    for publisher in model.Group.all('publisher'):
        parent_groups = publisher.get_groups('publisher')
        if parent_groups:
            continue
        group_extras = publisher.extras
        contact_details = group_extras['contact-email'] or group_extras['contact-phone']
        foi_details = group_extras['foi-email'] or group_extras['foi-phone']
        print '%s: Contact: %s Foi: %s' % (publisher.title,
                                           truncate(contact_details, 15) or 'NONE',
                                           truncate(foi_details, 15) or 'NONE')
        publishers += 1 
        without_contact += 1 if not contact_details else 0
        without_foi += 1 if not foi_details else 0
    print 'Total top level publishers: %i' % publishers
    print 'Total without contact details: %i' % without_contact
    print 'Total without FOI details: %i' % without_foi
Ejemplo n.º 26
0
def jsonify_msg_list(element):
    """
    Fixes the converting error in converting
    DATETIME objects to JSON
    """
    value = 'white'
    if (element.spam and not element.highspam and not element.blacklisted
        and not element.nameinfected and not element.otherinfected
        and not element.virusinfected):
        value = 'spam'
    if element.highspam and (not element.blacklisted):
        value = 'highspam'
    if element.whitelisted:
        value = 'whitelisted'
    if element.blacklisted:
        value = 'blacklisted'
    if (element.nameinfected or element.virusinfected or
        element.otherinfected):
        value = 'infected'
    if not element.scaned:
        value = 'gray'
    if (element.spam and (not element.blacklisted)
        and (not element.virusinfected)
        and (not element.nameinfected)
        and (not element.otherinfected)):
        status = _('Spam')
    if element.blacklisted:
        status = _('BL')
    if (element.virusinfected or
           element.nameinfected or
           element.otherinfected):
        status = _('Infected')
    if ((not element.spam) and (not element.virusinfected)
           and (not element.nameinfected)
           and (not element.otherinfected)
           and (not element.whitelisted)):
        status = _('Clean')
    if element.whitelisted:
        status = _('WL')
    if not element.scaned:
        status = _('NS')
    return dict(
            id=element.id,
            timestamp=element.timestamp.strftime('%Y-%m-%d %H:%M:%S %Z'),
            sascore=element.sascore,
            size=format_byte_size(element.size),
            subject=truncate(escape(element.subject), 50),
            from_address=wrap_paragraphs(escape(element.from_address), 32),
            to_address=wrap_paragraphs(escape(element.to_address), 32),
            style=value,
            status=status)
Ejemplo n.º 27
0
 def lines(self):
     from webhelpers.text import truncate
     if self.text is None:
         return
     for line in self.text.strip().split("\n"):
         while len(line.rstrip()) > self.LINE_LENGTH:
             part = truncate(line,
                             length=self.LINE_LENGTH,
                             indicator='',
                             whole_word=True)
             line = line[len(part):]
             line = line.lstrip()
             yield part
         yield line
def report():
    # report on top level publishers
    from ckan import model
    log = global_log
    log.info('Summary of top level publishers:')
    publishers = without_contact = without_foi = 0
    for publisher in model.Group.all('organization'):
        parent_groups = publisher.get_groups('organization')
        if parent_groups:
            continue
        group_extras = publisher.extras
        contact_details = group_extras['contact-email'] or group_extras[
            'contact-phone']
        foi_details = group_extras['foi-email'] or group_extras['foi-phone']
        print '%s: Contact: %s Foi: %s' % (
            publisher.title, truncate(contact_details, 15)
            or 'NONE', truncate(foi_details, 15) or 'NONE')
        publishers += 1
        without_contact += 1 if not contact_details else 0
        without_foi += 1 if not foi_details else 0
    print 'Total top level publishers: %i' % publishers
    print 'Total without contact details: %i' % without_contact
    print 'Total without FOI details: %i' % without_foi
Ejemplo n.º 29
0
 def json(self):
     "recent messages json"
     value = 'white'
     if (self.spam and not self.highspam and not self.blacklisted
         and not self.nameinfected and not self.otherinfected 
         and not self.virusinfected):
         value = 'spam'
     if self.highspam and (not self.blacklisted):
         value = 'highspam'
     if self.whitelisted:
         value = 'whitelisted'
     if self.blacklisted:
         value = 'blacklisted'
     if self.nameinfected or self.virusinfected or self.otherinfected:
         value = 'infected'
     if not self.scaned:
         value = 'gray'
     if (self.spam and (not self.blacklisted) 
         and (not self.virusinfected) 
         and (not self.nameinfected) 
         and (not self.otherinfected)):
         status = _('Spam')
     if self.blacklisted:
         status = _('BS')
     if (self.virusinfected or 
            self.nameinfected or 
            self.otherinfected):
         status = _('Infected')
     if ((not self.spam) and (not self.virusinfected) 
            and (not self.nameinfected) 
            and (not self.otherinfected) 
            and (not self.whitelisted)):
         status = _('Clean')
     if self.whitelisted:
         status = _('AS')
     if not self.scaned:
         status = _('NS')
     return dict(
                 id=self.id,
                 timestamp=str(self.timestamp),
                 sascore=self.sascore,
                 size=format_byte_size(self.size),
                 subject=escape(truncate((self.subject and self.subject.strip()) or '---', 50)),
                 from_address=escape(wrap_paragraphs(self.from_address, 32)),
                 to_address=escape(wrap_paragraphs(self.to_address or '---', 32)),
                 style=value,
                 status=status,
             )
Ejemplo n.º 30
0
    def sync_list(self, offset=0, step=FETCH_STEP):

        note_filter = NoteFilter(order=NoteSortOrder.CREATED, tagGuids=[self.tag_id])

        result_spec = NotesMetadataResultSpec(
            includeTitle=True, includeAttributes=True, includeCreated=True, includeTagGuids=True
        )

        self.result_list = self.note_store.findNotesMetadata(
            os.environ.get("EN_DEV_TOKEN"), note_filter, offset, step, result_spec
        )
        for note in self.result_list.notes:
            short = self.note_store.getNoteSearchText(os.environ.get("EN_DEV_TOKEN"), note.guid, True, False)
            tag_names = self.note_store.getNoteTagNames(os.environ.get("EN_DEV_TOKEN"), note.guid)
            for tag_name in tag_names:
                short = short.replace(tag_name, "")
            short = unicode(short, "utf8")
            short = short.replace("\n", "")
            short = short.strip()
            short = truncate(short, 240, whole_word=True)

            html = self._get_note(note.guid)[1]
            html = sanitize_html(html)

            article = Article.find_by_guid(note.guid)
            update = True
            if not article:
                update = False
                article = Article()
            article.guid = note.guid
            article.title = unicode(note.title, "utf-8")
            article.slug = urlify(article.title.replace("/", ""))
            article.created = epoch_date(note.created)
            article.source = note.attributes.sourceURL
            article.short = short
            article.body = html

            print article
            if not update:
                db.session.merge(article)
            db.session.commit()
            print "+++"

        offset += len(self.result_list.notes)
        if offset < self.result_list.totalNotes:
            self.sync_list(offset=offset, step=(self.result_list.totalNotes - offset))
Ejemplo n.º 31
0
def handle_update(
    db,
    id,
    tikapath,
    version,
    ):

    doc = db.find_one(id)
    data = doc.raw_data
    with NamedTemporaryFile() as tmpfile:
        tmpfile.write(data)
        tmpfile.seek(0)
        cmd = subprocess.Popen(['/usr/bin/java', '-jar', tikapath,
                               tmpfile.name], stdout=subprocess.PIPE)
        analysis = cmd.communicate()[0]
        tree = etree.fromstring(analysis)
        xp = lambda term: tree.xpath(term, namespaces=namespaces)
        namespaces = dict(html='http://www.w3.org/1999/xhtml')
        content_type = xp('//html:meta[@name="Content-Type"]/@content')
        date = xp('//html:meta[@name="Creation-Date"]/@content')
        if date:
            date = convertStringToDateTime(date[0])
        content = xp('//html:body/*')
        if content:
            content = ''.join([etree.tostring(x) for x in content])
        text = ' '.join(xp('//*/text()'))
        text = texthelpers.replace_whitespace(text.replace('\n', ' '
                )).strip()
        description = texthelpers.truncate(text, 100, '',
                whole_word=True)

        if content_type:
            doc.update_plugin_and_canonical_attr('content_type',
                    content_type[0])
        if date:
            doc.update_plugin_and_canonical_attr('created', date)
        if content:
            doc.update_plugin_attr('full_html', content)
            doc.register_html_representation('full_html')
        if text:
            doc.update_plugin('text', text)
            doc.register_searchable_field("text")
        if description:
            doc.update_plugin_and_canonical_attr('description', description)
        doc.finish_parsing(version)
        doc.reindex()
Ejemplo n.º 32
0
def handle_update(
    db,
    id,
    tikapath,
    version,
):

    doc = db.find_one(id)
    data = doc.raw_data
    with NamedTemporaryFile() as tmpfile:
        tmpfile.write(data)
        tmpfile.seek(0)
        cmd = subprocess.Popen(
            ['/usr/bin/java', '-jar', tikapath, tmpfile.name],
            stdout=subprocess.PIPE)
        analysis = cmd.communicate()[0]
        tree = etree.fromstring(analysis)
        xp = lambda term: tree.xpath(term, namespaces=namespaces)
        namespaces = dict(html='http://www.w3.org/1999/xhtml')
        content_type = xp('//html:meta[@name="Content-Type"]/@content')
        date = xp('//html:meta[@name="Creation-Date"]/@content')
        if date:
            date = convertStringToDateTime(date[0])
        content = xp('//html:body/*')
        if content:
            content = ''.join([etree.tostring(x) for x in content])
        text = ' '.join(xp('//*/text()'))
        text = texthelpers.replace_whitespace(text.replace('\n', ' ')).strip()
        description = texthelpers.truncate(text, 100, '', whole_word=True)

        if content_type:
            doc.update_plugin_and_canonical_attr('content_type',
                                                 content_type[0])
        if date:
            doc.update_plugin_and_canonical_attr('created', date)
        if content:
            doc.update_plugin_attr('full_html', content)
            doc.register_html_representation('full_html')
        if text:
            doc.update_plugin('text', text)
            doc.register_searchable_field("text")
        if description:
            doc.update_plugin_and_canonical_attr('description', description)
        doc.finish_parsing(version)
        doc.reindex()
Ejemplo n.º 33
0
def twitter_sink(pipeline):
    for notification in pipeline:
        user = notification.user
        if user.twitter and (notification.priority >= user.twitter.priority):
            notification.language_context()
            short_url = microblog.shorten_url(notification.link)
            remaining_length = TWITTER_LENGTH - \
                            (1 + len(short_url) + len(TRUNCATE_EXT))
            tweet = text.truncate(notification.subject, remaining_length,
                                  TRUNCATE_EXT, False)
            tweet += ' ' + short_url

            log.debug("twitter DM to %s: %s" %
                      (user.twitter.screen_name, tweet))
            api = microblog.create_default()
            api.PostDirectMessage(user.twitter.screen_name, tweet)
        else:
            yield notification
Ejemplo n.º 34
0
def twitter_sink(pipeline):
    for notification in pipeline:
        user = notification.user
        if user.twitter and (notification.priority >= user.twitter.priority):
            notification.language_context()
            short_url = microblog.shorten_url(notification.link)
            remaining_length = TWITTER_LENGTH - \
                            (1 + len(short_url) + len(TRUNCATE_EXT))
            tweet = text.truncate(notification.subject, remaining_length,
                                  TRUNCATE_EXT, False)
            tweet += ' ' + short_url

            log.debug("twitter DM to %s: %s" % (user.twitter.screen_name,
                                                tweet))
            api = microblog.create_default()
            api.PostDirectMessage(user.twitter.screen_name, tweet)
        else:
            yield notification
Ejemplo n.º 35
0
def link(title, href):
    title = cgi.escape(truncate(title, length=40, whole_word=True))
    return u"<a href='%s'>%s</a>" % (href, title)
Ejemplo n.º 36
0
 def render_readonly(self, **kwargs):
     value = super(EllipsysFieldRenderer, self).render_readonly(**kwargs)
     value = text.truncate(strip_tags(value), 30) if value else ''
     return value
Ejemplo n.º 37
0
 def _format(_html):
     return markdown_filter(truncate(remove_formatting(_html), length=200,
             whole_word=True))
Ejemplo n.º 38
0
Archivo: helpers.py Proyecto: HHS/ckan
def markdown_extract(text, extract_length=190):
    if (text is None) or (text.strip() == ''):
        return ''
    plain = re.sub(r'<.*?>', '', markdown(text))
    return literal(unicode(truncate(plain, length=extract_length, indicator='...', whole_word=True)))
Ejemplo n.º 39
0
 def _format(_html):
     return truncate(remove_formatting(_html), length=200, whole_word=True)
Ejemplo n.º 40
0
def markdown_extract(text, extract_length=190):
    if (text is None) or (text.strip() == ""):
        return ""
    plain = re.sub(r"<.*?>", "", markdown(text))
    return literal(unicode(truncate(plain, length=extract_length, indicator="...", whole_word=True)))
Ejemplo n.º 41
0
 def brief(self):
     text = strip_tags(self.wikitext_docs)
     if len(text) > 300:
         return truncate(text, 300)
     else:
         return ''
Ejemplo n.º 42
0
from ckanext.blog.authorize import blog_admin

log = getLogger(__name__)

def latest_post():
    try:
      from ckanext.blog.model.post import Post
      post = Session.query(Post).\
          filter(Post.visible == True).\
          order_by('created desc').\
          first()
    except NoResultFound, e:
      return None

    post.content_markdown = markdown(unicode(truncate(post.content, length=320, indicator='...', whole_word=True)))
    post.post_author = model.User.get(post.user_id) or Session.query(model.User).filter_by(id=post.user_id).first()
    return post

class BlogPlugin(p.SingletonPlugin):
    """This extension adds blogging functionality to ckan

    This extension implements four interfaces

      - ``IConfigurer`` allows to modify the configuration
      - ``IConfigurable`` get the configuration
      - ``IAuthFunctions`` to add custom authorization
      - ``IRoutes`` to add custom routes
    """
    p.implements(p.IConfigurer, inherit=True)
    p.implements(p.IConfigurable, inherit=True)
Ejemplo n.º 43
0
def remake_blog():
    global log
    args = docopt(
        """Usage:
        remake-blog [options]

    Options:
        -h --help                   show this help message and exit
        -c --config=CONFIG          use the given config file [default: blog.yml]
    """
    )
    with open(args["--config"]) as fp:
        config = yaml.load(fp)
    logging.config.dictConfig(config["logging"])
    log = logging.getLogger(__name__)
    log.info("Logging configured")

    # First, gather the posts
    posts = []
    for fn_post in glob.glob("posts/*.md"):
        slug = os.path.basename(fn_post).rsplit(".", 1)[0]
        md = markdown.Markdown(extensions=extensions)
        md_pre = markdown.Markdown(extensions=extensions)
        with open(fn_post) as fp:
            md_content = fp.read()
            html_content = md.convert(md_content)
            html_preview = md_pre.convert(text.truncate(md_content, 300, whole_word=True))
        post_data = dict(
            fn=fn_post,
            slug=slug,
            path="posts/{}.html".format(slug),
            title=md.Meta["title"][0],
            md_content=md_content,
            html_content=html_content,
            html_preview=html_preview,
            meta=md.Meta,
            date=datetime.strptime(md.Meta["date"][0], "%Y-%m-%d"),
        )
        if eval(md.Meta["published"][0]):
            posts.append(Object.ify(post_data))

    posts.sort(key=lambda p: p.date, reverse=True)
    archive = make_archive(posts)

    # Generate post html
    for post in posts:
        render_page(
            config,
            fn_out="public/posts/{}.html".format(post.slug),
            fn_template="post.html",
            page_id="blog",
            archive=archive,
            **post
        )

    # Generate archive pages
    for month_data in archive:
        render_page(config, fn_template="archive.html", page_id="blog", archive=archive, **month_data)

    # Generate other pages
    for slug, pg in config["pages"].items():
        if "content" in pg:
            with open(pg["content"]) as fp:
                md_content = fp.read()
                html_content = markdown.markdown(md_content)
        else:
            html_content = ""
        render_page(
            config,
            fn_out="public/{}.html".format(slug),
            fn_template=pg.get("template", "page.html"),
            html_content=html_content,
            title=pg["title"],
            page=pg,
            page_id=slug,
            posts=posts,
            archive=archive,
        )
Ejemplo n.º 44
0
from webhelpers.html.tools import mail_to
from webhelpers.text import truncate
from webhelpers.date import distance_of_time_in_words

import re

from pygments import highlight as _highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters.html import HtmlFormatter
from difflib import unified_diff

#log = logging.getLogger(__name__)

#----------------------------------------------------------------------

cut = lambda text, max=200: truncate(text, max, whole_word=True)


def link(label, url='', **attrs):
    return link_to(label, tgurl(url), **attrs)


def striphtml(text):
    return re.sub('<[^<]+?>', ' ', text).strip() if text else u''


def icon(icon_name, white=False):
    if (white):
        return html.literal('<i class="icon-%s icon-white"></i>' % icon_name)
    else:
        return html.literal('<i class="icon-%s"></i>' % icon_name)
Ejemplo n.º 45
0
def markdown_extract(text):
    if (text is None) or (text == ''):
        return ''
    html = fromstring(markdown(text))
    plain = html.xpath("string()")
    return unicode(truncate(plain, length=190, indicator='...', whole_word=True))
Ejemplo n.º 46
0
    def excerpt(self):
        result = truncate(strip_tags(literal(self.body)),
                          length=300,
                          whole_word=True)

        return result
Ejemplo n.º 47
0

def latest_post():
    try:
        from ckanext.blog.model.post import Post
        post = Session.query(Post).\
            filter(Post.visible == True).\
            order_by('created desc').\
            first()
    except NoResultFound, e:
        return None

    post.content_markdown = markdown(
        unicode(
            truncate(post.content,
                     length=320,
                     indicator='...',
                     whole_word=True)))
    post.post_author = model.User.get(post.user_id) or Session.query(
        model.User).filter_by(id=post.user_id).first()
    return post


class BlogPlugin(p.SingletonPlugin):
    """This extension adds blogging functionality to ckan

    This extension implements four interfaces

      - ``IConfigurer`` allows to modify the configuration
      - ``IConfigurable`` get the configuration
      - ``IAuthFunctions`` to add custom authorization
      - ``IRoutes`` to add custom routes
Ejemplo n.º 48
0
def markdown_extract(text, extract_length=190):
    if (text is None) or (text == ""):
        return ""
    html = fromstring(markdown(text))
    plain = html.xpath("string()")
    return unicode(truncate(plain, length=extract_length, indicator="...", whole_word=True))
Ejemplo n.º 49
0
from webhelpers.date import distance_of_time_in_words

import re

from pygments import highlight as _highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters.html import HtmlFormatter
from difflib import unified_diff

# log = logging.getLogger(__name__)


# ----------------------------------------------------------------------


cut = lambda text, max=200: truncate(text, max, whole_word=True)


def link(label, url="", **attrs):
    return link_to(label, tgurl(url), **attrs)


def striphtml(text):
    return re.sub("<[^<]+?>", " ", text).strip() if text else u""


def icon(icon_name, white=False):
    if white:
        return html.literal('<i class="icon-%s icon-white"></i>' % icon_name)
    else:
        return html.literal('<i class="icon-%s"></i>' % icon_name)
Ejemplo n.º 50
0
 def test_truncate(self):
     self.assertEqual("Hello World!", truncate("Hello World!", 12))
     self.assertEqual("Hello Wor...", truncate("Hello World!!", 12))
     self.assertEqual(
         "Hello...", truncate("Hello World!!", 12, whole_word=True))
Ejemplo n.º 51
0
def link(title, href):
    title = cgi.escape(truncate(title, length=40, whole_word=True))
    return u"<a href='%s'>%s</a>" % (href, title)
Ejemplo n.º 52
0
 def render_readonly(self, **kwargs):
     value = super(EllipsysFieldRenderer, self).render_readonly(**kwargs)
     value = text.truncate(strip_tags(value), 30) if value else ''
     return value