def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.links[0]["href"] soup = BeautifulSoup(e.description) try: description = jinja2.Markup(soup.findAll("p")[-1]).unescape() except IndexError as e: description = jinja2.Markup(soup).unescape() # try: # imgLink = soup.findAll("img")[0]["src"] # except IndexError as e: imgLink = GQ.gqLogo srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" % srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.links[0]["href"] soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() imgLink = soup.findAll("img")[0].get("src") srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" %srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.links[0]["href"] soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() imgLink = soup.findAll("img")[0].get("src") srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" % srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.links[0]["href"] soup = BeautifulSoup(e.description) try: description = jinja2.Markup(soup.findAll("p")[-1]).unescape() except IndexError as e: description = jinja2.Markup(soup).unescape() # try: # imgLink = soup.findAll("img")[0]["src"] # except IndexError as e: imgLink = GQ.gqLogo srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" %srcName)