Example #1
0
    def parseAndStoreDoc(self):
        for e in self.doc.entries:
            title = jinja2.Markup(e.title).unescape()
            soup = BeautifulSoup(e.description)
            description = jinja2.Markup(soup.getText()).unescape()
            #truncate the length of
            if len(description) > 500:
                description = description[:500]

            link = e.links[0]["href"]
            srcName = self.srcName
            labelName = self.srcName.split("_")[1]
            # if no image in entry, use the source logo instead
            try:
                imgLink = e.media_content[0]["url"]
            except AttributeError as e:
                imgLink = NewYorkTimes.logo
            article = Article(title=title,
                              description=description,
                              imgLink=imgLink,
                              link=link,
                              srcName=srcName,
                              labelName=labelName)
            article.put()
        logging.info("Storing data to database!(from %s)" % srcName)
Example #2
0
 def parseAndStoreDoc(self):
     for e in self.doc.entries:
         title = jinja2.Markup(e.title).unescape()
         link = e.link
         imgLink = e.imgurl
         soup = BeautifulSoup(e.description)
         description = jinja2.Markup(soup.getText()).unescape()
         srcName = self.srcName
         labelName = self.srcName.split("_")[1]
         article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName)
         article.put()
     logging.info("Storing data from %s" %srcName)
Example #3
0
 def parseAndStoreDoc(self):
     for e in self.doc.entries:
         title = jinja2.Markup(e.title).unescape()
         link = e.links[0]["href"]
         soup = BeautifulSoup(e.description)
         description = jinja2.Markup(soup.getText()).unescape()
         #retrieve the img link if not exit use logo instead
         try:
             imgLink = e.media_thumbnail[0]["url"]
         except AttributeError as e:
             imgLink = BBC.bbcLogo
         srcName = self.srcName
         labelName = self.srcName.split("_")[1]
         article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName)
         article.put()
     logging.info("Storing data from %s" %srcName)
Example #4
0
 def parseAndStoreDoc(self):
     for e in self.doc.entries:
         title = jinja2.Markup(e.title).unescape()
         link = e.link
         imgLink = e.imgurl
         soup = BeautifulSoup(e.description)
         description = jinja2.Markup(soup.getText()).unescape()
         srcName = self.srcName
         labelName = self.srcName.split("_")[1]
         article = Article(title=title,
                           description=description,
                           imgLink=imgLink,
                           link=link,
                           srcName=srcName,
                           labelName=labelName)
         article.put()
     logging.info("Storing data from %s" % srcName)
Example #5
0
    def parseAndStoreDoc(self):
        for e in self.doc.entries:
            title = jinja2.Markup(e.title).unescape()
            soup = BeautifulSoup(e.description)
            description = jinja2.Markup(soup.getText()).unescape()
            #truncate the length of
            if len(description) > 500:
                description = description[:500]

            link = e.links[0]["href"]
            srcName = self.srcName
            labelName = self.srcName.split("_")[1]
            # if no image in entry, use the source logo instead
            try:
                imgLink = e.media_content[0]["url"]
            except AttributeError as e:
                imgLink = NewYorkTimes.logo
            article = Article(title=title, description=description, imgLink=imgLink, link=link, srcName=srcName, labelName=labelName)
            article.put()
        logging.info("Storing data to database!(from %s)" %srcName)
Example #6
0
 def parseAndStoreDoc(self):
     for e in self.doc.entries:
         title = jinja2.Markup(e.title).unescape()
         link = e.links[0]["href"]
         soup = BeautifulSoup(e.description)
         description = jinja2.Markup(soup.getText()).unescape()
         #retrieve the img link if not exit use logo instead
         try:
             imgLink = e.media_thumbnail[0]["url"]
         except AttributeError as e:
             imgLink = BBC.bbcLogo
         srcName = self.srcName
         labelName = self.srcName.split("_")[1]
         article = Article(title=title,
                           description=description,
                           imgLink=imgLink,
                           link=link,
                           srcName=srcName,
                           labelName=labelName)
         article.put()
     logging.info("Storing data from %s" % srcName)