Exemple #1
0
def scrape_data(long_url):
    hdr = {"User-Agent": "Mozilla/5.0"}
    req = urllib2.Request(long_url, headers=hdr)
    response = urllib2.urlopen(req)
    soup = BeautifulSoup(response, "html.parser")

    title = soup.title.string

    meta_tags = soup.findAll("meta", {"property": "og:description"})
    og_desc = meta_tags[0].get("content", "No description")
    description = Truncator(og_desc).chars(200)

    return {"title": title.encode("utf-8"), "description": description.encode("utf-8")}
Exemple #2
0
def scrape_data(long_url):
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = urllib2.Request(long_url, headers=hdr)
    response = urllib2.urlopen(req)
    soup = BeautifulSoup(response, 'html.parser')

    title = soup.title.string

    meta_tags = soup.findAll('meta', {"property": 'og:description'})
    og_desc = meta_tags[0].get('content', 'No description')
    description = Truncator(og_desc).chars(200)

    return {
        "title": title.encode("utf-8"),
        "description": description.encode("utf-8"),
    }
Exemple #3
0
def get_item_notification(notification):
    '''
        This filter return info about
        one notification of one user
    '''
    idobject = notification.idobject
    is_comment = notification.is_comment

    html = ""
    if is_comment:

        try:
            comment = Comment.objects.get(idcomment=idobject)
            forum = comment.topic.forum.name
            slug = comment.topic.slug
            idtopic = comment.topic.idtopic
            description = Truncator(comment.description).chars(100)
            username = comment.user.username

            url_topic = "/topic/" + forum + "/" + \
                slug + "/" + str(idtopic) + "/"

            title = "<h5><a href='"+url_topic+"'><u>" + \
                comment.topic.title+"</u></h5></a>"

            description = "<p>"+description+"</p>"

            # Get params for url profile
            try:
                params = ""
                params = get_params_url_profile(comment.user)
            except Exception:
                params = ""

            # Data profile
            profile = get_id_profile(comment.user.id)
            photo = get_photo_profile(profile)
            if photo:
                path_img = settings.MEDIA_URL + str(photo)
            else:
                path_img = static("img/profile.png")

            url_profile = URL_PROFILE

            if params:
                user = "******"+url_profile+params + \
                    "'><p>" + username + "</p></a>"
            else:
                user = "******" + username + "</a>"

            date = get_datetime_topic(notification.date)

            # Notificacion
            html += '<div class="list-group">'
            html += '   <div class="list-group-item">'
            html += '      <div class="row-action-primary">'
            html += '           <img src="'+path_img + \
                '" width=30 height=30 class="img-circle" />'
            html += '       </div>'
            html += '       <div class="row-content">'
            html += '           <div class="least-content">'+date+'</div>'
            html += '           <h4 class="list-group-item-heading">' + \
                title.encode('utf8')+'</h4>'
            html += '           <p class="list-group-item-text">' + \
                description.encode('utf8')+'</p>'
            html += '           <p>'+user.encode('utf8')+'</p>'
            html += '        </div>'
            html += '   </div>'
            html += '   <div class="list-group-separator"></div>'
            html += '</div>'

        except Comment.DoesNotExist:
            html = ""
    else:
        html = ""

    return html
Exemple #4
0
    def import_entry(self, title, content, item_node):
        """
        Importing an entry but some data are missing like
        related entries, start_publication and end_publication.
        start_publication and creation_date will use the same value,
        wich is always in Wordpress $post->post_date.
        """
        try:
            creation_date = datetime.strptime(
                item_node.find('{%s}post_date_gmt' % WP_NS).text,
                '%Y-%m-%d %H:%M:%S')
        except:
            creation_date = datetime.now()
        if settings.USE_TZ:
            creation_date = timezone.make_aware(
                creation_date, pytz.timezone('GMT'))

        excerpt = item_node.find('{%sexcerpt/}encoded' % WP_NS).text
        if excerpt:
            excerpt = strip_tags(excerpt)
        else:
            if self.auto_excerpt:
                excerpt = Truncator(strip_tags(content)).words(50)
            else:
                excerpt = ''

        # Prefer use this function than
        # item_node.find('{%s}post_name' % WP_NS).text
        # Because slug can be not well formated
        slug = slugify(title)[:255] or 'post-%s' % item_node.find(
            '{%s}post_id' % WP_NS).text

        entry_dict = {
            'title': title,
            'content': content.encode('ascii', 'xmlcharrefreplace'),
            'excerpt': excerpt.encode('ascii', 'xmlcharrefreplace'),
            'tags': ', '.join(self.get_entry_tags(item_node.findall(
                'category')))[:255],
            'status': self.REVERSE_STATUS[item_node.find(
                '{%s}status' % WP_NS).text],
            'comment_enabled': item_node.find(
                '{%s}comment_status' % WP_NS).text == 'open',
            'pingback_enabled': item_node.find(
                '{%s}ping_status' % WP_NS).text == 'open',
            'featured': item_node.find('{%s}is_sticky' % WP_NS).text == '1',
            'password': item_node.find('{%s}post_password' % WP_NS).text or '',
            'login_required': item_node.find(
                '{%s}status' % WP_NS).text == 'private',
            'last_update': timezone.now()}
        entry_dict['trackback_enabled'] = entry_dict['pingback_enabled']

        entry, created = Entry.objects.get_or_create(
            slug=slug, creation_date=creation_date,
            defaults=entry_dict)
        if created:
            entry.categories.add(*self.get_entry_categories(
                item_node.findall('category')))
            entry.authors.add(self.authors[item_node.find(
                '{http://purl.org/dc/elements/1.1/}creator').text])
            entry.sites.add(self.SITE)

        return entry, created
Exemple #5
0
def get_item_notification(notification):
    '''
        This filter return info about
        one notification of one user
    '''
    idobject = notification.idobject
    is_comment = notification.is_comment

    html = ""
    if is_comment:

        try:
            comment = Comment.objects.get(idcomment=idobject)
            forum = comment.topic.forum.name
            slug = comment.topic.slug
            idtopic = comment.topic.idtopic
            description = Truncator(comment.description).chars(100)
            username = comment.user.username

            url_topic = "/topic/" + forum + "/" + \
                slug + "/" + str(idtopic) + "/"

            title = "<h5><a href='"+url_topic+"'><u>" + \
                comment.topic.title+"</u></h5></a>"

            description = "<p>" + description + "</p>"

            # Get params for url profile
            try:
                params = ""
                params = get_params_url_profile(comment.user)
            except Exception:
                params = ""

            # Data profile
            profile = get_id_profile(comment.user.id)
            photo = get_photo_profile(profile)
            if photo:
                path_img = settings.MEDIA_URL + str(photo)
            else:
                path_img = static("img/profile.png")

            url_profile = URL_PROFILE

            if params:
                user = "******"+url_profile+params + \
                    "'><p>" + username + "</p></a>"
            else:
                user = "******" + username + "</a>"

            date = get_datetime_topic(notification.date)

            # Notificacion
            html += '<div class="list-group">'
            html += '   <div class="list-group-item">'
            html += '      <div class="row-action-primary">'
            html += '           <img src="'+path_img + \
                '" width=30 height=30 class="img-circle" />'
            html += '       </div>'
            html += '       <div class="row-content">'
            html += '           <div class="least-content">' + date + '</div>'
            html += '           <h4 class="list-group-item-heading">' + \
                title.encode('utf8')+'</h4>'
            html += '           <p class="list-group-item-text">' + \
                description.encode('utf8')+'</p>'
            html += '           <p>' + user.encode('utf8') + '</p>'
            html += '        </div>'
            html += '   </div>'
            html += '   <div class="list-group-separator"></div>'
            html += '</div>'

        except Comment.DoesNotExist:
            html = ""
    else:
        html = ""

    return html