Ejemplo n.º 1
0
def format_content(value):
    value = value.replace('</p>', '<br>')
    value = bleach.clean(value, allowed_tags,
                         allowed_attributes,
                         allowed_styles,
                         strip=True)
    soup = BeautifulSoup(value)

    tags = soup.find_all('img')
    for tag in tags:
        if tag is not None:
            if "style" in unicode(tag):
                width = re.findall(r'\d+', tag['style'])
                style = tag['style']
                if width:
                    if int(width[0]) > 280 or style == u'width: 100%;':
                        tag['style'] = 'width:100%;vertical-align:middle'
                    else:
                        tag['style'] = 'width:%spx;vertical-align:middle' \
                                       % width[0]

    if soup.body:
        body = get_content(soup)
        remove_tags(unicode(body), "body")
        output = soup.new_tag("div")
        list = body.contents[:]
        for content in list:
            output.append(content)

    return unicode(output)
Ejemplo n.º 2
0
	def getPreview(self,obj):
		pattern = re.compile('<preview .*?>(.*?)</preview>')
		text_preview = pattern.findall(obj.content)
		try:
			return remove_tags(text_preview,'preview')
		except:
			return ('%s...') % remove_tags(obj.content[:150],'preview')
Ejemplo n.º 3
0
                def xt(cls, step_selector):
                    title_selector = step_selector.xpath('//td[2]')[0]

                    # we have wortmeldungen!
                    if title_selector.xpath('//table'):
                        table_selector = title_selector.xpath('//table')[0]
                        raw_rows = [
                            Selector(text=raw_row) for raw_row in
                            table_selector.xpath('//tbody//tr').extract()
                        ]
                        statements = []
                        # Extract statements data
                        for index, row_selector in enumerate(raw_rows):
                            try:
                                person_source_link = row_selector.xpath(
                                    cls.XP_P_LINK).extract()[0]
                                person_name = row_selector.xpath(
                                    cls.XP_P_NAME).extract()
                                statement_type = _clean(
                                    row_selector.xpath(
                                        cls.XP_T_TYPE).extract()[0])
                                protocol_link = row_selector.xpath(
                                    cls.XP_PROT_LINK).extract()
                                protocol_text = _clean(
                                    remove_tags(
                                        row_selector.xpath(
                                            cls.XP_PROT_TEXT).extract()[0],
                                        'td a'))
                                statements.append({
                                    'index':
                                    index,
                                    'person_source_link':
                                    person_source_link,
                                    'person_name':
                                    person_name,
                                    'statement_type':
                                    statement_type,
                                    'protocol_link':
                                    protocol_link,
                                    'protocol_text':
                                    protocol_text,
                                })
                            except:
                                logger.error(
                                    "Skipping statement '{}' due to extraction error"
                                    .format(row_selector.extract()))
                                continue
                        title = {
                            'text': u'Wortmeldungen in der Debatte',
                            'statements': statements
                        }
                    else:
                        text = _clean(
                            remove_tags(
                                step_selector.xpath(cls.XPATH).extract()[0],
                                'td')).replace('<a href="',
                                               '<a href="{}'.format(BASE_HOST))
                        title = {'text': text}
                    return title
Ejemplo n.º 4
0
                def xt(cls, step_selector):
                    title_selector = step_selector.xpath('//td[2]')[0]

                    # we have wortmeldungen!
                    if title_selector.xpath('//table'):
                        table_selector = title_selector.xpath('//table')[0]
                        raw_rows = [
                            Selector(text=raw_row)
                            for raw_row
                            in table_selector.xpath('//tbody//tr').extract()]
                        statements = []
                        # Extract statements data
                        for index, row_selector in enumerate(raw_rows):
                            if(row_selector.xpath(cls.XP_P_LINK).extract()):
                                person_source_link = row_selector.xpath(
                                    cls.XP_P_LINK).extract()[0]
                            else:
                                continue

                            person_name = row_selector.xpath(
                                cls.XP_P_NAME).extract()
                            if(row_selector.xpath(cls.XP_T_TYPE).extract()):
                                statement_type = _clean(
                                    row_selector.xpath(cls.XP_T_TYPE).extract()[0])
                            else:
                                continue
                            protocol_link = row_selector.xpath(
                                cls.XP_PROT_LINK).extract()
                            if(row_selector.xpath(
                                        cls.XP_PROT_TEXT).extract()):
                                protocol_text = _clean(
                                    remove_tags(
                                        row_selector.xpath(
                                            cls.XP_PROT_TEXT).extract()[0],
                                        'td a'))
                            else:
                                protocol_text = []
                            statements.append({
                                'index': index,
                                'person_source_link': person_source_link,
                                'person_name': person_name,
                                'statement_type': statement_type,
                                'protocol_link': protocol_link,
                                'protocol_text': protocol_text,
                            })
                        title = {
                            'text': u'Wortmeldungen in der Debatte',
                            'statements': statements
                        }                    
                    else:
                        text = _clean(
                            remove_tags(
                                step_selector.xpath(
                                    cls.XPATH).extract()[0],
                                'td')).replace('<a href="', '<a href="{}'.format(BASE_HOST))
                        title = {'text': text}
                    return title
Ejemplo n.º 5
0
            def xt(cls, response):
                mandates_raw = response.xpath(cls.XPATH).extract()
                mandates = []
                for mandate in mandates_raw:
                    mandate = _clean(remove_tags(mandate, 'li'))

                    if "<div" in mandate and "</div>" in mandate:
                        mandate = _clean(
                            remove_tags(
                                Selector(
                                    text=mandate).xpath("//div").extract()[0],
                                'div'))

                    function = mandate.split(u'<br>')[0].split(',')[0]
                    party = mandate.split(u'<br>')[0].split(',')[1]

                    # Start Date
                    try:
                        start_date = _clean(
                            mandate.split('<br>')[1].split(u'\u2013')[0])

                        start_date = datetime.datetime.strptime(
                            start_date, "%d.%m.%Y").date()
                    except:
                        logger.error(
                            u"Failed to parse mandate start date: {}".format(
                                start_date))
                        start_date = None

                    # End Date
                    try:
                        end_date = mandate.split('<br>')[1].split(u'\u2013')
                        if len(end_date) > 1 and end_date[1]:
                            end_date = datetime.datetime.strptime(
                                _clean(end_date[1]), "%d.%m.%Y").date()
                        else:
                            end_date = None
                    except:
                        logger.error(
                            u"Failed to parse mandate end date: {}".format(
                                end_date))
                        end_date = None

                    mandates.append({
                        'function': function,
                        'party': _clean(party),
                        'start_date': start_date,
                        'end_date': end_date,
                    })

                return mandates
Ejemplo n.º 6
0
            def xt(cls, response):
                mandates_raw = response.xpath(cls.XPATH).extract()
                mandates = []
                for mandate in mandates_raw:
                    mandate = _clean(remove_tags(mandate, 'li'))

                    if "<div" in mandate and "</div>" in mandate:
                        mandate = _clean(remove_tags(
                            Selector(text=mandate).xpath("//div").extract()[0],
                            'div'))

                    function = mandate.split(u'<br>')[0].split(',')[0]
                    party = mandate.split(u'<br>')[0].split(',')[1]

                    # Start Date
                    try:
                        start_date = _clean(
                            mandate.split('<br>')[1].split(u'\u2013')[0])

                        start_date = datetime.datetime.strptime(
                            start_date, "%d.%m.%Y").date()
                    except:
                        logger.error(
                            u"Failed to parse mandate start date: {}".format(start_date))
                        start_date = None

                    # End Date
                    try:
                        end_date = mandate.split(
                            '<br>')[1].split(u'\u2013')
                        if len(end_date) > 1 and end_date[1]:
                            end_date = datetime.datetime.strptime(
                                _clean(end_date[1]), "%d.%m.%Y").date()
                        else:
                            end_date = None
                    except:
                        logger.error(
                            u"Failed to parse mandate end date: {}".format(end_date))
                        end_date = None

                    mandates.append({
                        'function': function,
                        'party': _clean(party),
                        'start_date': start_date,
                        'end_date': end_date,
                    })

                return mandates
Ejemplo n.º 7
0
        def xt(cls, response):
            persons = []
            raw_persons = response.xpath(cls.XPATH).extract()
            for raw_person in raw_persons:
                person = Selector(text=raw_person)
                if person.xpath('//th'):
                    continue
                source_link = person.xpath(
                    '//td//a/@href').extract()[0]
                reversed_name = _clean(
                    Selector(
                        text=remove_tags(raw_person, 'img')
                    ).xpath('//td//a/text()').extract()[0])

                (pres_start_date, pres_end_date) = cls.xt_pres_date(
                    raw_person)

                mandate = {
                    'title': u'RechnungshofpräsidentIn',
                    'short': u'RH-PräsidentIn',
                    'start_date': pres_start_date,
                    'end_date': pres_end_date
                }
                persons.append({
                    'source_link': source_link,
                    'reversed_name': reversed_name,
                    'mandate': mandate,
                })

            return persons
Ejemplo n.º 8
0
def bbs_pub(request):
    categories = Category.objects.all()
    hashkey = CaptchaStore.generate_key()  
    image_url = captcha_image_url(hashkey)  
    if request.method == 'POST':
        form = BbsPubForm(request.POST)

        if form.is_valid():
            cd = form.cleaned_data
            bbsBiz = BbsBiz()         
            bbs_category = bbsBiz.getCategory(cd['bbs_category'])
            bbs_author = bbsBiz.getBbsAuthorByReq(request.user)

            if bbs_category and bbs_author:
                bbs_content = remove_tags(cd['bbs_content'], "html body script")
                BBS.objects.create(
                    bbs_title = cd['bbs_title'],
                    bbs_content = bbs_content,
                    view_count = 0,
                    bbs_category = bbs_category,
                    bbs_author = bbs_author,
                )
                return HttpResponseRedirect(reverse('home'))      
        return render_to_response("bbs_pub.html", {"form": form,
            "categories": categories,
            "hashkey": hashkey, "image_url": image_url},
            context_instance = RequestContext(request))

    form = BbsPubForm()
    return render_to_response("bbs_pub.html",
        {"form": form, "categories": categories,
        "hashkey": hashkey, "image_url": image_url},
        context_instance = RequestContext(request))
Ejemplo n.º 9
0
 def xt(cls, response):
     description = response.xpath(cls.XPATH).extract()
     if description:
         description = description[0]
     else:
         description = u""
     return remove_tags(description, 'p')
Ejemplo n.º 10
0
def cockpit_page_result_list(cl):
    """
    Displays the headers and data list together
    Replaces admin template tag "result_list". Constructs page list according to the
    hierarchical structure.
    """
    headers = list(result_headers(cl))
    num_sorted_fields = 0
    for h in headers:
        if h['sortable'] and h['sorted']:
            num_sorted_fields += 1

    page_results = cl.result_list
    ordered_results = create_ordered_page_list(page_results)
    cl.result_list = ordered_results['ordered_list']
    hierarchy_levels = ordered_results['hierarchy_levels']
    list_results = list(results(cl))

    # Hierarchical indentation
    i = 0
    for result in list_results:
        result[1] = remove_tags(result[1], "th")
        result[1] = mark_safe(u"<th style='padding-left: %dpx;'>%s</th>" % (5 + hierarchy_levels[i] * 20, result[1]))
        i += 1
    return {'cl': cl,
            'result_hidden_fields': list(result_hidden_fields(cl)),
            'result_headers': headers,
            'num_sorted_fields': num_sorted_fields,
            'results': list_results}
Ejemplo n.º 11
0
 def xt(cls, response):
     description = response.xpath(cls.XPATH).extract()
     if description:
         description = description[0]
     else:
         description = u""
     return remove_tags(description, 'p')
Ejemplo n.º 12
0
 def xt(cls, response):
     try:
         description = response.xpath(cls.XPATH)[0].extract()[0]
     except:
         import ipdb
         ipdb.set_trace()
     return remove_tags(description, 'p')
Ejemplo n.º 13
0
 def xt(cls, response):
     description = response.xpath(cls.XPATH).extract()
     if description:
         description = description[0]
     else:
         description = u""
     description_nowhitespace = re.sub('\s+',' ',description)
     return remove_tags(description_nowhitespace, 'p').strip()
Ejemplo n.º 14
0
        def xt(cls, response):
            try:
                description = response.xpath(cls.XPATH)[0].extract()[0]
            except:
                import ipdb

                ipdb.set_trace()
            return remove_tags(description, "p")
Ejemplo n.º 15
0
 def xt(cls, response):
     description = response.xpath(cls.XPATH).extract()
     if description:
         description = description[0]
     else:
         description = u""
     description_nowhitespace = re.sub('\s+', ' ', description)
     return remove_tags(description_nowhitespace, 'p').strip()
Ejemplo n.º 16
0
                def xt(cls, step_selector):
                    title_selector = step_selector.xpath("//td[2]")[0]

                    # we have wortmeldungen!
                    if title_selector.xpath("//table"):
                        table_selector = title_selector.xpath("//table")[0]
                        raw_rows = [Selector(text=raw_row) for raw_row in table_selector.xpath("//tbody//tr").extract()]
                        statements = []
                        # Extract statements data
                        for index, row_selector in enumerate(raw_rows):
                            if row_selector.xpath(cls.XP_P_LINK).extract():
                                person_source_link = row_selector.xpath(cls.XP_P_LINK).extract()[0]
                            else:
                                continue

                            person_name = row_selector.xpath(cls.XP_P_NAME).extract()
                            if row_selector.xpath(cls.XP_T_TYPE).extract():
                                statement_type = _clean(row_selector.xpath(cls.XP_T_TYPE).extract()[0])
                            else:
                                continue
                            protocol_link = row_selector.xpath(cls.XP_PROT_LINK).extract()
                            if row_selector.xpath(cls.XP_PROT_TEXT).extract():
                                protocol_text = _clean(
                                    remove_tags(row_selector.xpath(cls.XP_PROT_TEXT).extract()[0], "td a")
                                )
                            else:
                                protocol_text = []
                            statements.append(
                                {
                                    "index": index,
                                    "person_source_link": person_source_link,
                                    "person_name": person_name,
                                    "statement_type": statement_type,
                                    "protocol_link": protocol_link,
                                    "protocol_text": protocol_text,
                                }
                            )
                        title = {"text": u"Wortmeldungen in der Debatte", "statements": statements}
                    else:
                        text = _clean(remove_tags(step_selector.xpath(cls.XPATH).extract()[0], "td")).replace(
                            '<a href="', '<a href="{}'.format(BASE_HOST)
                        )
                        title = {"text": text}
                    return title
Ejemplo n.º 17
0
        def xt(cls, response):
            persons = []
            raw_persons = response.xpath(cls.XPATH).extract()
            for raw_person in raw_persons:
                person = Selector(text=raw_person)
                if person.xpath('//th'):
                    continue
                source_link = person.xpath(
                    '//td//a/@href').extract()[0]
                reversed_name = _clean(
                    Selector(
                        text=remove_tags(raw_person, 'img')
                    ).xpath('//td//a/text()').extract()[0])
                if ' siehe ' in reversed_name:
                    reversed_name = reversed_name.split(' siehe ')[1]
                admin_title = person.xpath(
                    '//td[1]/span/text()').extract()

                (admin_start_date, admin_end_date) = cls.xt_admin_date(
                    raw_person)

                administration = {
                    'title': admin_title,
                    'start_date': admin_start_date,
                    'end_date': admin_end_date
                }
                # TODO EXTRACT DATE(S) FROM BUNDESMINISTERIUM td
                # TODO ADD EITHER DATE(S) TO FUNCTION
                try:
                    if person.xpath('//tr//td[3]/span/text()'):
                        function_short = person.xpath(
                            '//td[3]/span/text()').extract()[0]
                        function_title = person.xpath(
                            '//td[3]/span/@title').extract()[0]

                    elif person.xpath('//tr//td[3]/text()'):
                        function_short = _clean(person.xpath(
                            '//td[3]/text()').extract()[0])
                        function_title = ''
                except:
                    import ipdb
                    ipdb.set_trace()
                mandate = {
                    'short': function_short,
                    'title': function_title,
                    'administration': administration}

                persons.append({
                    'source_link': source_link,
                    'reversed_name': reversed_name,
                    'mandate': mandate,
                })

            return persons
Ejemplo n.º 18
0
def remove_tags_and_comments():
  global s
  htmlcomments = re.compile('\<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)\>')

  #print 'BEFORE:', s
  s = remove_tags (s, 'table thead tfoot tbody td tr th font TABLE THEAD TFOOT TBODY TD TR TH FONT center CENTER EM em span SPAN')  # p br div P BR DIV
  s = htmlcomments.sub ('', s)
  s = s.strip('\r\n\t')
  s = s.replace ('&nbsp;','')
  #s, errs = tidy_fragment (s, options= {'indent':1, 'wrap': 120, 'merge-divs': 'yes'})
  if trace: print errs

  return True
Ejemplo n.º 19
0
 def get(self, request, slot):
     check_schedule_view(request)
     try:
         slot_id = int(slot)
         slot = get_object_or_404(Slot, pk=slot_id)
         if slot.content_ptr.slug:
             return redirect(slot.get_absolute_url(), permanent=True)
     except ValueError:
         slot = get_object_or_404(Slot, content_ptr__slug=slot)
     data = {
         "slot": slot,
         "biography": mark_safe(remove_tags(slot.content.speaker.biography.rendered, 'script'))
     }
     return render(request, self.template_name, data)
Ejemplo n.º 20
0
def format_option(value):

    value = value.replace('</p>', '<br>')
    value = bleach.clean(value, allowed_tags,
                         allowed_attributes,
                         allowed_styles,
                         strip=True)

    soup = BeautifulSoup(value)
    tags = soup.find_all('img')
    if tags:
        for tag in tags:
            tag['style'] = 'vertical-align:middle'

    if soup.body:
        body = get_content(soup)
        output = remove_tags(unicode(body), "body")

    return unicode(output)
Ejemplo n.º 21
0
def replays(request, bbs_id):      
    if request.is_ajax():
        form = ReplayForm(request.POST)
        content = request.POST.get('content', None)
        # 過濾掉不安全標籤
        content = remove_tags(content, "script html body")
        if form.is_valid() and content and bbs_id:
            Comments.objects.create(
                user_id = BBS_user.objects.get(user__username=request.user),
                bbs_id = BBS.objects.get(id=bbs_id),
                pub_date = datetime.datetime.now(),
                cmt_content = content
                )
            cur_user = BBS_user.objects.get(user__username=request.user)
            # 傳入用戶頭像地址,用於ajax
            avatar = unicode(cur_user.avatar)
            return  HttpResponse(json.dumps(
                {"content": content,
                 "avatar": avatar,
                 "signature": cur_user.signature
                }))
        return None

    return HttpResponseRedirect(reverse("home"))
Ejemplo n.º 22
0
def removetags(value, tags):
    """Removes a space separated list of [X]HTML tags from the output."""
    from django.utils.html import remove_tags
    return remove_tags(value, tags)
Ejemplo n.º 23
0
def removetags(value, tags):
    """Removes a space separated list of [X]HTML tags from the output."""
    return remove_tags(value, tags)
Ejemplo n.º 24
0
 def __unicode__(self):
     try:
         return remove_tags(self.title, 'a')
     except:
         return self.title
Ejemplo n.º 25
0
 def remove_p_and_br(value):
     return remove_tags(value, 'p br')
Ejemplo n.º 26
0
def createJsonValues(request, secret_key):

    now = datetime.datetime.now()
    today_dt = now.strftime("%d%m%Y")
    mystring = 'businessworld'
    new_string = today_dt + mystring
    #return HttpResponse(new_string)
    hash_object = hashlib.md5(new_string.encode())
    my_secret_key = hash_object.hexdigest()

    if (my_secret_key == secret_key):

        recent_articles = Articles.objects.raw(
            "SELECT A.*, AU.*, AI.image_url, AI.photopath, AV.video_embed_code FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id LEFT JOIN author AU ON A_A.author_id = AU.author_id JOIN article_images AI ON AI.article_id = A.article_id LEFT JOIN article_video AV ON A.article_id = AV.article_id WHERE A.display_to_homepage = '1' AND AI.photopath !='' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 8"
        )
        recent_articles_json = []
        for article in recent_articles:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['video_type'] = article.video_type
            art_elem['important_article'] = article.important_article
            art_elem['absolute_url'] = article.get_absolute_url()
            recent_articles_json.append(art_elem)

        bwtv_articles = VideoMaster.objects.raw(
            "SELECT * FROM video_master ORDER BY video_id DESC LIMIT 6")
        bwtv_articles_json = []
        for article in bwtv_articles:
            art_elem = {}
            art_elem['video_title'] = article.video_title
            art_elem['video_thumb_name'] = article.video_thumb_name
            art_elem['absolute_url'] = article.get_absolute_url()
            bwtv_articles_json.append(art_elem)

        recent_important_article = Articles.objects.raw(
            "SELECT A.*, AU.*, AI.image_url, AI.photopath FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id LEFT JOIN author AU ON A_A.author_id = AU.author_id LEFT JOIN article_images AI ON A.article_id = AI.article_id WHERE A.display_to_homepage = '1' AND A.important_article = '1' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 1"
        )
        recent_important_article_json = []
        for article in recent_important_article:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['video_type'] = article.video_type
            art_elem['article_summary'] = article.article_summary
            art_elem['author_name'] = article.author_name
            author_url = article.get_article_author_url()
            art_elem['author_url'] = author_url
            art_elem['absolute_url'] = article.get_absolute_url()
            recent_important_article_json.append(art_elem)

        recent_exclusive_article = Articles.objects.raw(
            "SELECT A.*, AI.image_url, AI.photopath FROM articles A LEFT JOIN article_images AI ON A.article_id = AI.article_id WHERE A.is_exclusive = '1' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 4"
        )
        recent_exclusive_article_json = []
        for article in recent_exclusive_article:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['video_type'] = article.video_type
            art_elem['article_summary'] = article.article_summary
            author_url = article.get_article_author_url()
            art_elem['author_url'] = author_url
            art_elem['absolute_url'] = article.get_absolute_url()
            recent_exclusive_article_json.append(art_elem)

        column_articles = Articles.objects.raw(
            "SELECT A.*, AU.*, AI.image_url, AI.photopath FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id LEFT JOIN author AU ON A_A.author_id = AU.author_id LEFT JOIN article_images AI ON A.article_id = AI.article_id WHERE A_A.author_type='4' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 4"
        )
        column_articles_json = []
        for article in column_articles:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['video_type'] = article.video_type
            art_elem['article_summary'] = article.article_summary
            art_elem['author_name'] = article.author_name
            author_url = article.get_article_author_url()
            art_elem['author_url'] = author_url
            art_elem['absolute_url'] = article.get_absolute_url()
            column_articles_json.append(art_elem)

        columnist = Author.objects.raw(
            "SELECT * FROM (SELECT AU.*, AR.article_published_date FROM author AU INNER JOIN article_author ARU ON AU.author_id = ARU.author_id INNER JOIN articles AR ON ARU.article_id = AR.article_id WHERE AU.author_type='4' ORDER BY AR.article_published_date DESC) AS tem GROUP BY tem.author_id ORDER BY article_published_date DESC LIMIT 9"
        )
        columnist_json = []
        for article in columnist:
            art_elem = {}
            art_elem['author_photo'] = article.author_photo
            art_elem['author_name'] = article.author_name
            art_elem['absolute_url'] = article.get_absolute_url()
            columnist_json.append(art_elem)

        sidebar_recent_articles = Articles.objects.raw(
            "SELECT A.*, AU.*, AI.image_url, AI.photopath, AV.video_embed_code FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id LEFT JOIN author AU ON A_A.author_id = AU.author_id JOIN article_images AI ON AI.article_id = A.article_id LEFT JOIN article_video AV ON A.article_id = AV.article_id WHERE A.display_to_homepage = '1' AND AI.photopath !='' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 10"
        )
        sidebar_recent_articles_json = []
        for article in sidebar_recent_articles:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['absolute_url'] = article.get_absolute_url()
            sidebar_recent_articles_json.append(art_elem)

        recent_articles_interview = Articles.objects.raw(
            "SELECT A.*, AU.*, AI.image_url, AI.photopath FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id LEFT JOIN author AU ON A_A.author_id = AU.author_id LEFT JOIN article_images AI ON A.article_id = AI.article_id WHERE A.display_to_homepage = '1' AND A.article_type = 3  AND AI.photopath !='' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 1, 6"
        )
        recent_articles_interview_json = []
        for article in recent_articles_interview:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['video_type'] = article.video_type
            art_elem['absolute_url'] = article.get_absolute_url()
            recent_articles_interview_json.append(art_elem)

        recent_important_article_interview = Articles.objects.raw(
            "SELECT A.*, AU.*, AI.image_url, AI.photopath FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id LEFT JOIN author AU ON A_A.author_id = AU.author_id LEFT JOIN article_images AI ON A.article_id = AI.article_id WHERE A.display_to_homepage = '1'  AND A.article_type = 3 AND AI.photopath !='' GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 1"
        )
        recent_important_article_interview_json = []
        for article in recent_important_article_interview:
            art_elem = {}
            art_elem['article_title'] = article.article_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['article_id'] = article.article_id
            art_elem['photopath'] = article.photopath
            art_elem['video_type'] = article.video_type
            art_elem['article_summary'] = article.article_summary
            art_elem['author_name'] = article.author_name
            author_url = article.get_article_author_url()
            art_elem['author_url'] = author_url
            art_elem['absolute_url'] = article.get_absolute_url()
            recent_important_article_interview_json.append(art_elem)

        category_jumlist = ChannelCategory.objects.filter(category_parent='0')
        category_jumlist_json = []
        for article in category_jumlist:
            art_elem = {}
            art_elem['category_name'] = article.category_name
            art_elem['absolute_url'] = article.category_self_url()
            category_jumlist_json.append(art_elem)

        author_list_on_home_page = Author.objects.raw(
            "SELECT * FROM (SELECT AU.*, AR.article_published_date, NWS.newsletter_counts FROM author AU INNER JOIN article_author ARU ON AU.author_id = ARU.author_id INNER JOIN articles AR ON ARU.article_id = AR.article_id LEFT JOIN (SELECT author_newsletter_type_id, COUNT(author_newsletter_type_id) AS newsletter_counts FROM author_newsletter_Subscriber GROUP BY author_newsletter_type_id)NWS ON AU.author_id = NWS.author_newsletter_type_id WHERE AU.author_type='4'  OR AU.author_type='3'  ORDER BY AR.article_published_date DESC) AS tem GROUP BY tem.author_id ORDER BY article_published_date desc LIMIT 6"
        )
        author_list_on_home_page_json = []
        for article in author_list_on_home_page:
            art_elem = {}
            art_elem['author_name'] = article.author_name
            art_elem['newsletter_counts'] = str(article.newsletter_counts)
            art_elem['author_photo'] = article.author_photo
            art_elem['absolute_url'] = article.get_absolute_url()
            author_list_on_home_page_json.append(art_elem)

        bwtv_articles = Articles.objects.raw(
            "SELECT A.*, AC.*, AI.image_url, AI.photopath FROM articles A LEFT JOIN  article_category AC ON A.article_id = AC.article_id LEFT JOIN article_images AI ON A.article_id = AI.article_id  WHERE  AC.category_id = '156' ORDER BY A.article_published_date DESC LIMIT 10"
        )
        bwtv_articles_json = []
        for article in bwtv_articles:
            art_elem = {}
            art_elem['video_title'] = article.video_title
            art_elem['article_published_date'] = str(
                article.article_published_date)
            art_elem['video_thumb_name'] = article.video_thumb_name
            art_elem['absolute_url'] = article.get_absolute_url()
            bwtv_articles_json.append(art_elem)

        magazine_image = Magazine.objects.raw(
            "SELECT *,YEAR(publish_date_m) as years FROM magazine ORDER BY publish_date_m DESC LIMIT 1"
        )
        magazine_image_json = []
        for article in magazine_image:
            art_elem = {}
            art_elem['description'] = article.description
            art_elem['imagepath'] = article.imagepath
            art_elem['story1_url'] = article.story1_url
            art_elem['story1_title'] = article.story1_title
            art_elem['story2_url'] = article.story2_url
            art_elem['story2_title'] = article.story2_title
            art_elem['story3_url'] = article.story3_url
            art_elem['story3_title'] = article.story3_title
            art_elem['story4_url'] = article.story4_url
            art_elem['story4_title'] = article.story4_title
            art_elem['story5_url'] = article.story5_url
            art_elem['story5_title'] = article.story5_title
            art_elem['flipbook_url'] = article.flipbook_url
            art_elem['absolute_url'] = article.get_absolute_url()
            art_elem['years'] = article.years
            magazine_image_json.append(art_elem)

        photoshoot_listing = PhotoShoot.objects.raw(
            "SELECT count(*) as counts, ps.*,psp.photo_shoot_photo_url, psp.photo_shoot_image_id , psp.photo_shoot_photo_name FROM photo_shoot_photos psp join photo_shoot ps on  psp.photo_shoot_id=ps.photo_shoot_id group by psp.photo_shoot_id ORDER BY  ps.photo_shoot_id DESC  LIMIT 0,5"
        )
        photoshoot_listing_json = []
        for article in photoshoot_listing:
            art_elem = {}
            art_elem['photo_shoot_title'] = article.photo_shoot_title
            art_elem['photo_shoot_photo_name'] = article.photo_shoot_photo_name
            art_elem['counts'] = article.counts
            art_elem['absolute_url'] = article.get_absolute_url()
            photoshoot_listing_json.append(art_elem)

        #Rest of the site Sidebar data

        client = storage.Client()
        bucket = client.get_bucket('bwmedia')
        blob_homepage = bucket.get_blob(
            'json-files/bwdiff/homepage_site_data.json')
        #print(blob_homepage.download_as_string())
        blob_homepage.upload_from_string(
            json.dumps({
                'recent_articles': recent_articles_json,
                'sidebar_recent_articles': sidebar_recent_articles_json,
                'bwtv_articles': bwtv_articles_json,
                'recent_important_article': recent_important_article_json,
                'column_articles': column_articles_json,
                'columnist': columnist_json,
                'recent_articles_interview': recent_articles_interview_json,
                'recent_important_article_interview':
                recent_important_article_interview_json,
                'category_jumlist': category_jumlist_json,
                'author_list_on_home_page': author_list_on_home_page_json,
                'bw_corporate_movement': bw_corporate_movement_json,
                'magazine_image': magazine_image_json,
                'featured_boxs': featured_boxs_json,
                'recent_exclusive_article': recent_exclusive_article_json,
                'photoshoot_listing': photoshoot_listing_json,
            }))

        blob_sidebar = bucket.get_blob(
            'json-files/bw-bwdiff/sidebar_site_data.json')
        blob_sidebar.upload_from_string(
            json.dumps({
                'sidebar_recent_articles':
                sidebar_recent_articles_json[:6],
                'bwtv_articles':
                bwtv_articles_json[:6],
                'category_jumlist':
                category_jumlist_json,
            }))

        feeds_bwcio = feedparser.parse(
            'http://bwcio.businessworld.in/rss/all-article.xml')
        feeds_bws = feedparser.parse(
            'http://bwsmartcities.businessworld.in/rss/channel-feed-articles.xml'
        )
        feeds_bwh = feedparser.parse(
            'http://businessworld.in/rss/latest-article.xml')
        feeds_bwd = feedparser.parse(
            'http://bwdisrupt.businessworld.in/rss/channel-feed-articles.xml')
        feeds_ever = feedparser.parse(
            'http://everythingexperiential.businessworld.in/rss/channel-feed-articles.xml'
        )
        feeds_bwwh = feedparser.parse(
            'http://bwwealth.businessworld.in/rss/all-article.xml')
        feeds_bma = feedparser.parse('http://www.digitalmarket.asia/feed/')

        feeds_bwcio_json = []
        feeds_bws_json = []
        feeds_bwh_json = []
        feeds_bwd_json = []
        feeds_ever_json = []
        feeds_bwwh_json = []
        feeds_bma_json = []

        for entry in feeds_bwcio.entries:
            #return HttpResponse(entry.link)
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_bwcio_json.append(json_feed)

        for entry in feeds_bws.entries:
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_bws_json.append(json_feed)

        for entry in feeds_bwh.entries:
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_bwh_json.append(json_feed)

        for entry in feeds_bwd.entries:
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_bwd_json.append(json_feed)

        for entry in feeds_ever.entries:
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_ever_json.append(json_feed)

        for entry in feeds_bwwh.entries:
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_bwwh_json.append(json_feed)

        for entry in feeds_bma.entries:
            json_feed = {}
            json_feed['link'] = entry.link
            json_feed['title'] = entry.title
            feeds_bma_json.append(json_feed)

        blob_footer_community = bucket.get_blob(
            'json-files/bw-bwdiff/footer_community_site_data.json')
        blob_footer_community.upload_from_string(
            json.dumps({
                'feeds_bwcio': feeds_bwcio_json,
                'feeds_bws': feeds_bws_json,
                'feeds_bwh': feeds_bwh_json,
                'feeds_bwd': feeds_bwd_json,
                'feeds_ever': feeds_ever_json,
                'feeds_bwwh': feeds_bwwh_json,
                'feeds_bma': feeds_bma_json,
                'recent_exclusive_article': recent_exclusive_article_json,
                'column_articles': column_articles_json,
            }))

        #Dow Jones XML generation
        #urlset = ET.Element('xml', version="1.0", encoding="UTF-8")
        nodes = ET.Element('nodes')

        html_parser = HTMLParser.HTMLParser()

        #50 article list without BW Online for yahoo and dow jones
        article_list = Articles.objects.raw(
            "SELECT A.* FROM articles A LEFT JOIN article_author A_A ON A.article_id = A_A.article_id WHERE A_A.author_type != 1 GROUP BY A.article_id ORDER BY A.article_published_date DESC LIMIT 50"
        )

        for article in article_list:
            node = ET.SubElement(nodes, "node")
            ET.SubElement(
                node, "link"
            ).text = 'http://bwhotelier.businessworld.in/' + article.get_absolute_url(
            )
            ET.SubElement(node, "title").text = html_parser.unescape(
                article.article_title)

            categories = article.get_article_category_listing()
            if len(list(categories)) > 0:
                categories_list = ET.SubElement(node, "categories")
                for category in categories:
                    ET.SubElement(categories_list, "category").text = category
            ET.SubElement(node, "description").text = html_parser.unescape(
                article.article_summary)
            ET.SubElement(node, "author").text = html_parser.unescape(
                article.get_article_author_name())
            article_desc = remove_tags(article.article_description, "p")
            ET.SubElement(node, "body").text = article_desc
            ET.SubElement(node, "post-date").text = str(
                article.article_published_date.strftime(
                    '%a, %Y %b %d %H:%M:%S %Z'))

        tree = ET.tostring(nodes)
        tree = re.sub(r'&lt;', '<', tree)
        tree = re.sub(r'&gt;', '>', tree)
        out = open('static/xml/dow_jones_article.xml', 'w+')
        out.write(tree)
        out.close()

        return HttpResponse(json.dumps({'result': 'completed'}))
    else:
        return HttpResponseRedirect("/")
Ejemplo n.º 27
0
 def __unicode__(self):
     try:
         return remove_tags(self.title, 'a')
     except:
         return self.title
Ejemplo n.º 28
0
"""Default variable filters."""
Ejemplo n.º 29
0
 def xt(cls, response):
     status = remove_tags(
         response.xpath(cls.XPATH).extract()[0], 'em img p')
     status = status.replace('Status: ', '')
     return status
Ejemplo n.º 30
0
            def xt(cls, response):
                mandates_raw = response.xpath(cls.XPATH).extract()
                mandates = []
                for mandate in mandates_raw:
                    mandate = _clean(remove_tags(mandate, 'li'))

                    if "<div" in mandate and "</div>" in mandate:
                        mandate = _clean(remove_tags(
                            Selector(text=mandate).xpath("//div").extract()[0],
                            'div'))

                    function = mandate.split(u'<br>')[0].split(',')[0]
                    party = mandate.split(u'<br>')[0].split(',')[1] if ',' in mandate.split(u'<br />')[0] else ''
                    llp_raw = re.match(
                        '^.*\((.*)\. GP\).*$', function
                        )
                    function = re.sub(
                        '\((.*)\. GP\)','', function
                        ).strip()

                    m_llp_roman_begin = \
                        m_llp_roman_end = \
                            llp_raw.group(1) if llp_raw else ''

                    if u'–' in m_llp_roman_begin:
                        m_llp_roman_begin,m_llp_roman_end = m_llp_roman_begin.split(u'–')

                    for llp in range(roman.fromRoman(m_llp_roman_begin.strip('. ')),
                                    roman.fromRoman(m_llp_roman_end.strip('. '))+1
                                    ) if m_llp_roman_begin else [None]:
                        llp_roman = roman.toRoman(llp) if llp else None

                        # Start Date
                        try:
                            start_date = _clean(
                                mandate.split('<br>')[1].split(u'\u2013')[0])

                            start_date = datetime.datetime.strptime(
                                start_date, "%d.%m.%Y").date()
                        except:
                            logger.error(
                                u"Failed to parse mandate start date: {}".format(start_date))
                            start_date = None

                        # End Date
                        try:
                            end_date = mandate.split(
                                '<br>')[1].split(u'\u2013')
                            if len(end_date) > 1 and end_date[1]:
                                end_date = datetime.datetime.strptime(
                                    _clean(end_date[1]), "%d.%m.%Y").date()
                            else:
                                end_date = None
                        except:
                            logger.error(
                                u"Failed to parse mandate end date: {}".format(end_date))
                            end_date = None

                        mandates.append({
                            'function': function,
                            'party': _clean(party),
                            'start_date': start_date,
                            'end_date': end_date,
                            'llp': llp,
                            'llp_roman': llp_roman,
                        })

                return mandates
Ejemplo n.º 31
0
def stripy(node):
    cleaner(node)
    return remove_tags(tostring(node, encoding='utf-8', method="html", pretty_print=True).decode('utf-8'), 'div')
Ejemplo n.º 32
0
def removetags(value, tags):
    """Removes a space separated list of [X]HTML tags from the output."""
    return remove_tags(value, tags)
Ejemplo n.º 33
0
 def save(self, *args, **kwargs):
     if (self.text is not None):
         self.text = remove_tags(self.text, "font span")
     super(Recipe, self).save(*args, **kwargs)
Ejemplo n.º 34
0
def remove_part_tags(html,tags):
    return remove_tags(html, tags)
Ejemplo n.º 35
0
def removetags(value, tags):
    """Removes a space separated list of [X]HTML tags from the output."""
    from django.utils.html import remove_tags

    return remove_tags(value, tags)
Ejemplo n.º 36
0
 def xt(cls, response):
     status = remove_tags(
         response.xpath(cls.XPATH).extract()[0], 'em img p')
     status = status.replace('Status: ', '')
     return status
Ejemplo n.º 37
0
 def __unicode__(self):
     if self.title:
         return remove_tags(self.title, 'a')
     else:
         return self.title
Ejemplo n.º 38
0
	def getContent(self,obj):
		#First, we have to remove the preview tag.
		return remove_tags(obj.content,'preview')
Ejemplo n.º 39
0
 def save(self, *args, **kwargs):
     self.body = remove_tags(self.body, "font span")
     super(Post, self).save(*args, **kwargs)
Ejemplo n.º 40
0
 def admin_thumbnail(self):
     thumbnail = remove_tags(self.content, "p br")
     return u"%s" % thumbnail