def createArticle(self, baseUrl, articleData): data = self.readUrl(baseUrl + articleData[0]) chennelDatas = re.findall( r'<div class="main-content">.*?<article class="article-content">.*?<div class="user-content">.*?<h1>(.*?)</h1>.*?</article>.*?<script type="text/javascript">.*?mp3: "(.+?)".*?</script>.*?</div>', data, re.DOTALL) article = Article() article.title = chennelDatas[0][0] article.link = chennelDatas[0][1] article.pubDate = articleData[2] article.description = '' descriptionDatas = re.findall(r'<p class="description">(.+?)</p>', data, re.DOTALL) if len(descriptionDatas) > 0: article.description = descriptionDatas[0] else: descriptionDatas = re.findall(r'<div class="content">.*?<p>(.+?)</p>.*?</div>', data, re.DOTALL) if len(descriptionDatas) > 0: article.description = descriptionDatas[0] return article
def parseArticle(self, baseUrl, link): data = self.readUrl(link) searchItems = re.findall( r'<article class="news-article">.*?<div class="article-info">.*?<div class="col-sm-3 col-xs-6">.*?<strong>EMITIRANO</strong>:<br>(.+?)</div>.*?</div>.*?<div id="jplayer_container" class="audio-player played repeat-on">.*?<div class="track-info">.*?<p class="track-title">(.+?)</p>.*?<div class="download-section">.*?<h4>Preuzmite datoteku</h4>.*?<a href="(.+?)" class="attachment-file">.*?<span class="file-size pull-right">(.+?)</span>.*?</a>.*?</div>.*?<blockquote>.*?<h3>.*?</h3>.*?<p>(.+?)</p>.*?</blockquote>.*?</article>', data, re.DOTALL, ) if not searchItems: return None article = Article() article.pubDate = searchItems[0][0].strip() article.title = searchItems[0][1].strip() article.link = baseUrl + searchItems[0][2].strip() # article.size = searchItems[0][3].strip() article.description = searchItems[0][4].strip() return article