def scrape_data(long_url): hdr = {"User-Agent": "Mozilla/5.0"} req = urllib2.Request(long_url, headers=hdr) response = urllib2.urlopen(req) soup = BeautifulSoup(response, "html.parser") title = soup.title.string meta_tags = soup.findAll("meta", {"property": "og:description"}) og_desc = meta_tags[0].get("content", "No description") description = Truncator(og_desc).chars(200) return {"title": title.encode("utf-8"), "description": description.encode("utf-8")}
def scrape_data(long_url): hdr = {'User-Agent': 'Mozilla/5.0'} req = urllib2.Request(long_url, headers=hdr) response = urllib2.urlopen(req) soup = BeautifulSoup(response, 'html.parser') title = soup.title.string meta_tags = soup.findAll('meta', {"property": 'og:description'}) og_desc = meta_tags[0].get('content', 'No description') description = Truncator(og_desc).chars(200) return { "title": title.encode("utf-8"), "description": description.encode("utf-8"), }
def get_item_notification(notification): ''' This filter return info about one notification of one user ''' idobject = notification.idobject is_comment = notification.is_comment html = "" if is_comment: try: comment = Comment.objects.get(idcomment=idobject) forum = comment.topic.forum.name slug = comment.topic.slug idtopic = comment.topic.idtopic description = Truncator(comment.description).chars(100) username = comment.user.username url_topic = "/topic/" + forum + "/" + \ slug + "/" + str(idtopic) + "/" title = "<h5><a href='"+url_topic+"'><u>" + \ comment.topic.title+"</u></h5></a>" description = "<p>"+description+"</p>" # Get params for url profile try: params = "" params = get_params_url_profile(comment.user) except Exception: params = "" # Data profile profile = get_id_profile(comment.user.id) photo = get_photo_profile(profile) if photo: path_img = settings.MEDIA_URL + str(photo) else: path_img = static("img/profile.png") url_profile = URL_PROFILE if params: user = "******"+url_profile+params + \ "'><p>" + username + "</p></a>" else: user = "******" + username + "</a>" date = get_datetime_topic(notification.date) # Notificacion html += '<div class="list-group">' html += ' <div class="list-group-item">' html += ' <div class="row-action-primary">' html += ' <img src="'+path_img + \ '" width=30 height=30 class="img-circle" />' html += ' </div>' html += ' <div class="row-content">' html += ' <div class="least-content">'+date+'</div>' html += ' <h4 class="list-group-item-heading">' + \ title.encode('utf8')+'</h4>' html += ' <p class="list-group-item-text">' + \ description.encode('utf8')+'</p>' html += ' <p>'+user.encode('utf8')+'</p>' html += ' </div>' html += ' </div>' html += ' <div class="list-group-separator"></div>' html += '</div>' except Comment.DoesNotExist: html = "" else: html = "" return html
def import_entry(self, title, content, item_node): """ Importing an entry but some data are missing like related entries, start_publication and end_publication. start_publication and creation_date will use the same value, wich is always in Wordpress $post->post_date. """ try: creation_date = datetime.strptime( item_node.find('{%s}post_date_gmt' % WP_NS).text, '%Y-%m-%d %H:%M:%S') except: creation_date = datetime.now() if settings.USE_TZ: creation_date = timezone.make_aware( creation_date, pytz.timezone('GMT')) excerpt = item_node.find('{%sexcerpt/}encoded' % WP_NS).text if excerpt: excerpt = strip_tags(excerpt) else: if self.auto_excerpt: excerpt = Truncator(strip_tags(content)).words(50) else: excerpt = '' # Prefer use this function than # item_node.find('{%s}post_name' % WP_NS).text # Because slug can be not well formated slug = slugify(title)[:255] or 'post-%s' % item_node.find( '{%s}post_id' % WP_NS).text entry_dict = { 'title': title, 'content': content.encode('ascii', 'xmlcharrefreplace'), 'excerpt': excerpt.encode('ascii', 'xmlcharrefreplace'), 'tags': ', '.join(self.get_entry_tags(item_node.findall( 'category')))[:255], 'status': self.REVERSE_STATUS[item_node.find( '{%s}status' % WP_NS).text], 'comment_enabled': item_node.find( '{%s}comment_status' % WP_NS).text == 'open', 'pingback_enabled': item_node.find( '{%s}ping_status' % WP_NS).text == 'open', 'featured': item_node.find('{%s}is_sticky' % WP_NS).text == '1', 'password': item_node.find('{%s}post_password' % WP_NS).text or '', 'login_required': item_node.find( '{%s}status' % WP_NS).text == 'private', 'last_update': timezone.now()} entry_dict['trackback_enabled'] = entry_dict['pingback_enabled'] entry, created = Entry.objects.get_or_create( slug=slug, creation_date=creation_date, defaults=entry_dict) if created: entry.categories.add(*self.get_entry_categories( item_node.findall('category'))) entry.authors.add(self.authors[item_node.find( '{http://purl.org/dc/elements/1.1/}creator').text]) entry.sites.add(self.SITE) return entry, created
def get_item_notification(notification): ''' This filter return info about one notification of one user ''' idobject = notification.idobject is_comment = notification.is_comment html = "" if is_comment: try: comment = Comment.objects.get(idcomment=idobject) forum = comment.topic.forum.name slug = comment.topic.slug idtopic = comment.topic.idtopic description = Truncator(comment.description).chars(100) username = comment.user.username url_topic = "/topic/" + forum + "/" + \ slug + "/" + str(idtopic) + "/" title = "<h5><a href='"+url_topic+"'><u>" + \ comment.topic.title+"</u></h5></a>" description = "<p>" + description + "</p>" # Get params for url profile try: params = "" params = get_params_url_profile(comment.user) except Exception: params = "" # Data profile profile = get_id_profile(comment.user.id) photo = get_photo_profile(profile) if photo: path_img = settings.MEDIA_URL + str(photo) else: path_img = static("img/profile.png") url_profile = URL_PROFILE if params: user = "******"+url_profile+params + \ "'><p>" + username + "</p></a>" else: user = "******" + username + "</a>" date = get_datetime_topic(notification.date) # Notificacion html += '<div class="list-group">' html += ' <div class="list-group-item">' html += ' <div class="row-action-primary">' html += ' <img src="'+path_img + \ '" width=30 height=30 class="img-circle" />' html += ' </div>' html += ' <div class="row-content">' html += ' <div class="least-content">' + date + '</div>' html += ' <h4 class="list-group-item-heading">' + \ title.encode('utf8')+'</h4>' html += ' <p class="list-group-item-text">' + \ description.encode('utf8')+'</p>' html += ' <p>' + user.encode('utf8') + '</p>' html += ' </div>' html += ' </div>' html += ' <div class="list-group-separator"></div>' html += '</div>' except Comment.DoesNotExist: html = "" else: html = "" return html