def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() #truncate the length of if len(description) > 500: description = description[:500] link = e.links[0]["href"] srcName = self.srcName labelName = self.srcName.split("_")[1] # if no image in entry, use the source logo instead try: imgLink = e.media_content[0]["url"] except AttributeError as e: imgLink = NewYorkTimes.logo article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data to database!(from %s)" % srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.link imgLink = e.imgurl soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" %srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.links[0]["href"] soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() #retrieve the img link if not exit use logo instead try: imgLink = e.media_thumbnail[0]["url"] except AttributeError as e: imgLink = BBC.bbcLogo srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" %srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.link imgLink = e.imgurl soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" % srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() #truncate the length of if len(description) > 500: description = description[:500] link = e.links[0]["href"] srcName = self.srcName labelName = self.srcName.split("_")[1] # if no image in entry, use the source logo instead try: imgLink = e.media_content[0]["url"] except AttributeError as e: imgLink = NewYorkTimes.logo article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data to database!(from %s)" %srcName)
def parseAndStoreDoc(self): for e in self.doc.entries: title = jinja2.Markup(e.title).unescape() link = e.links[0]["href"] soup = BeautifulSoup(e.description) description = jinja2.Markup(soup.getText()).unescape() #retrieve the img link if not exit use logo instead try: imgLink = e.media_thumbnail[0]["url"] except AttributeError as e: imgLink = BBC.bbcLogo srcName = self.srcName labelName = self.srcName.split("_")[1] article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName) article.put() logging.info("Storing data from %s" % srcName)