Example #1
0
    def createArticle(self, baseUrl, articleData):
        data = self.readUrl(baseUrl + articleData[0])

        chennelDatas = re.findall(
            r'<div class="main-content">.*?<article class="article-content">.*?<div class="user-content">.*?<h1>(.*?)</h1>.*?</article>.*?<script type="text/javascript">.*?mp3: "(.+?)".*?</script>.*?</div>',
            data, re.DOTALL)

        article = Article()
        article.title = chennelDatas[0][0]
        article.link = chennelDatas[0][1]
        article.pubDate = articleData[2]
        article.description = ''

        descriptionDatas = re.findall(r'<p class="description">(.+?)</p>', data, re.DOTALL)
        if len(descriptionDatas) > 0:
            article.description = descriptionDatas[0]
        else:
            descriptionDatas = re.findall(r'<div class="content">.*?<p>(.+?)</p>.*?</div>', data, re.DOTALL)
            if len(descriptionDatas) > 0:
                article.description = descriptionDatas[0]

        return article
Example #2
0
    def parseArticle(self, baseUrl, link):
        data = self.readUrl(link)

        searchItems = re.findall(
            r'<article class="news-article">.*?<div class="article-info">.*?<div class="col-sm-3 col-xs-6">.*?<strong>EMITIRANO</strong>:<br>(.+?)</div>.*?</div>.*?<div id="jplayer_container" class="audio-player  played repeat-on">.*?<div class="track-info">.*?<p class="track-title">(.+?)</p>.*?<div class="download-section">.*?<h4>Preuzmite datoteku</h4>.*?<a href="(.+?)" class="attachment-file">.*?<span class="file-size pull-right">(.+?)</span>.*?</a>.*?</div>.*?<blockquote>.*?<h3>.*?</h3>.*?<p>(.+?)</p>.*?</blockquote>.*?</article>',
            data,
            re.DOTALL,
        )

        if not searchItems:
            return None

        article = Article()
        article.pubDate = searchItems[0][0].strip()
        article.title = searchItems[0][1].strip()
        article.link = baseUrl + searchItems[0][2].strip()
        # article.size = searchItems[0][3].strip()
        article.description = searchItems[0][4].strip()

        return article