예제 #1
0
 def process_as_rss(self, document):
     self.document = document
     self.parse()
     items = self.items()
     for item in items:
         pubdate = item.findAll('dc:date')
         if not pubdate:
             continue
         published_at = self.get_datetime(pubdate.pop().get_text())
         links = [ link.next_sibling for link in item.findAll('link') ]
         for link in links:
             if not link or note.exists(link):
                 continue
             try:
                 self.fetch_and_clean_dom(link)
             except EmptyDOM: # Server returned an empty response.
                 continue
             prioritya = ".  ".join(self.h1s())
             priorityb = ".  ".join(self.h2s())
             priorityc = ".  ".join(self.h3s())
             priorityd = ".  ".join([ a[0] for a in self.as_() ])
             prioritye = " ".join(self.ps())
             
             n, errors = note.get_or_create(link, prioritya, priorityb, priorityc, priorityd, prioritye, published_at)
             if errors:
                 sys.stderr.write(str(errors) + "\n")
예제 #2
0
파일: __init__.py 프로젝트: JoeHill/julian
    def get_note(self):
        (n, created), errors = note.get_or_create( identifier='http://www.google.com/', 
                                               prioritya="The quick brown fox jumped over the lazy dog.", 
                                               priorityb="Lorem ipsum dolor sit.", 
                                               priorityc="The knights who say ni.", 
                                               priorityd="", 
                                               prioritye="", 
                                               published_at=now() )

        return n, created
예제 #3
0
파일: internals.py 프로젝트: JoeHill/julian
    def process_as_rss(self, document):
        """
        Processes a document assuming RSS format. It follows the links to the articles 
        and saves them with the appropriate priority for the tag text is found under.
        
        :param unicode document: The RSS source code
        
        :rtype None:
        """
        self.document = document
        self.parse()
        items = self.items()
        for item in items:
            pubdate = item.findAll("pubdate")
            if not pubdate:
                continue
            published_at = self.get_datetime(pubdate.pop().get_text())
            links = [link.next_sibling for link in item.findAll("link")]
            for link in links:
                if not link or note.exists(link):
                    continue
                try:
                    self.fetch_and_clean_dom(link)
                except EmptyDOM:  # Server returned an empty response.
                    continue
                prioritya = ".  ".join(self.h1s())
                priorityb = ".  ".join(self.h2s())
                priorityc = ".  ".join(self.h3s())
                priorityd = ".  ".join([a[0] for a in self.as_()])
                prioritye = " ".join(self.ps())

                n, errors = note.get_or_create(
                    link, prioritya, priorityb, priorityc, priorityd, prioritye, published_at
                )
                if errors:
                    sys.stderr.write(str(errors) + "\n")