Ejemplo n.º 1
0
 def testHTTP410Gone(self):
     """Error thrown if URL has been removed (HTTP 410)."""
     try:
         tumblr.parse(self.urlGone)
     except tumblr.URLGoneError:
         pass
     else:
         self.fail("Expected a URLGoneError!")
Ejemplo n.º 2
0
 def testHTTP500ServerError(self):
     """Error thrown if HTTP 500 occurs."""
     try:
         tumblr.parse(self.urlServerError)
     except tumblr.InternalServerError:
         pass
     else:
         self.fail("Expected an InternalServerError!")
Ejemplo n.º 3
0
 def testHTTP403Forbidden(self):
     """Error thrown if URL is forbidden (HTTP 403)."""
     try:
         tumblr.parse(self.urlForbidden)
     except tumblr.URLForbiddenError:
         pass
     else:
         self.fail("Expected a URLForbiddenError!")
Ejemplo n.º 4
0
 def testHTTP404NotFound(self):
     """Error thrown if URL cannot be found (HTTP 404)."""
     try:
         tumblr.parse(self.urlNotFound)
     except tumblr.URLNotFoundError:
         pass
     else:
         self.fail("Expected a URLNotFoundError!")
Ejemplo n.º 5
0
 def testMalformedXML(self):
     """Error thrown if XML is not well-formed."""
     try:
         tumblr.parse(self.urlMalformed)
     except tumblr.TumblrParseError:
         pass
     else:
         fail("Expected a TumblrParseError for malformed XML!")
Ejemplo n.º 6
0
 def testUnencodedAmpersand(self):
     """Error thrown if ampersand is unencoded."""
     try:
         tumblr.parse(self.urlBadAmpersand)
     except tumblr.TumblrParseError:
         pass
     else:
         fail("Expected a TumblrParseError for malformed XML due to unencoded ampersand!")
Ejemplo n.º 7
0
 def testContentTypeApplicationXhtmlXml(self):
     """Error thrown if content-type application/xhtml+xml used."""
     try:
         tumblr.parse(self.urlContentTypeApplicationXhtmlXml)
     except tumblr.UnsupportedContentTypeError:
         pass
     else:
         self.fail("Expected an UnsupportedContentTypeError!")
Ejemplo n.º 8
0
 def testHTTP503ServiceUnavailableError(self):
     """Error thrown if HTTP 503 occurs."""
     try:
         tumblr.parse(self.urlServiceUnavailable)
     except tumblr.ServiceUnavailableError:
         pass
     else:
         self.fail("Expected a ServiceUnavailableError!")
Ejemplo n.º 9
0
 def testUnreachableHost(self):
     """Error thrown if host cannot be reached."""
     from httplib2 import ServerNotFoundError
     try:
         tumblr.parse(self.urlUnreachable)
     except ServerNotFoundError:
         pass
     else:
         self.fail("Expected a ServerNotFoundError!")
Ejemplo n.º 10
0
 def testString(self):
     """An XML string can be passed to the parser."""
     f = open(self.filename, 'r')
     xmlString = f.read()
     f.close()
     log = tumblr.parse(xmlString)
     assert log.title == 'golden hours'
Ejemplo n.º 11
0
Archivo: updater.py Proyecto: sdb/sdb
def update_tumblr(service):
  import tumblr
  t = tumblr.parse('http://%s.tumblr.com/api/read' %service.args['site'], cache_dir=settings.TUMBLR_CACHE_DIR)
  entries = []
  for p in t.posts:
    pub_date = datetime.strptime(p.date_gmt, '%Y-%m-%d %H:%M:%S %Z')
    if pub_date > service.updated:
      uuid = p.url
      if (p.type == 'link'):
        update_type = 'Link' 
        data = {'title' : p.title,
          'url' : p.url,
          'desc' : p.description,
          'type' : p.type,
          'link_url' : p.link_url,
          'via' : p.via}
        typ = 'link'
      elif p.type == 'quote':
        None
      else:
        None # TODO
      if typ:
        entry = Entry(uuid=uuid, service=service, desc='Tumblr %s' %update_type, data=json.dumps(data), pub_date=pub_date, typ=typ)
        entries.append(entry)
  return entries 
Ejemplo n.º 12
0
 def setUp(self):
     self.urls = ( 
         "http://golden.cpl593h.net/api/read",
         "http://thelongesttrainieversaw.tumblr.com/api/read",
         "http://industry.tumblr.com/api/read",
         "http://marco.tumblr.com/api/read",
         "http://demo.tumblr.com/api/read"
     )
     # self.urls = ( "http://marco.tumblr.com/api/read", "http://www.marco.org/api/read" )
     self.logs = []
     for url in self.urls:
         self.logs.append(tumblr.parse(url))
Ejemplo n.º 13
0
 def start(self):
     out = open("../data/tumblr_output.txt", "w")
     ins = open( "/tumblr_urls.txt", "r" )
     for line in ins:
         print line
         data = tumblr.parse(line.strip())
         for p in data.posts:
             if p.__class__ is tumblr.Regular:
                 #pprint(p
                 soup = BeautifulSoup(p.body)
                 print 'Body :',soup.text
                 if soup.text is not None:
                     out.write('Body :' + soup.text.strip().encode('utf-8'))
                 #pass
             elif p.__class__ is tumblr.Photo:
                 pass
            # print p.urls
     ins.close()
     out.close()
Ejemplo n.º 14
0
 def testOpenFile(self):
     """An open file object can be passed to the parser."""
     f = open(self.filename, 'r')
     log = tumblr.parse(f)
     f.close()
     assert log.title == 'golden hours'
Ejemplo n.º 15
0
 def setUp(self):
     self.url = "http://chompy.net/lab/tumblrapi/tests/tumblelog/sourcefeeds.xml"
     self.log = tumblr.parse(self.url)
Ejemplo n.º 16
0
 def setUp(self):
     self.url = "http://chompy.net/lab/tumblrapi/tests/tumblelog/regular.xml"
     self.log = tumblr.parse(self.url)
Ejemplo n.º 17
0
 def setUp(self):
     self.url = 'http://chompy.net/lab/tumblrapi/tests/tumblelog/demo.xml'
     self.log = tumblr.parse(self.url)
Ejemplo n.º 18
0
 def testHTTP301MovedPermanently(self):
    """Redirect via HTTP 301 is recorded."""
    log = tumblr.parse(self.urlMovedPermanently)
    assert (log.http_response.previous.status == 301) and (log.http_response['content-location'] == self.urlRedirectDestination)
Ejemplo n.º 19
0
 def testHTTP307(self):
     """Redirect via HTTP 307 is recorded."""
     log = tumblr.parse(self.urlTemporaryRedirect)
     assert (log.http_response.previous.status == 307) and (log.http_response['content-location'] == self.urlRedirectDestination)
Ejemplo n.º 20
0
 def parse(self):
     """Fetches Tumblr API data and parses it."""
     self.logger.info("Fetching API data at '%s'" % self.api_url)
     self.http_response, self.http_content = spider.fetch(self.api_url)
     self.logger.info("Parsing API data for entries...")
     t = tumblr.parse(self.api_url)
     for post in t.posts:
         try:
             if post.type == 'regular':
                 self.logger.info("Tumblr post type: regular")
                 e = Post()
                 e.title = post.title
                 e.summary = post.content
                 e.content = post.content
             elif post.type == 'link':
                 if 'link' in self.excluded_types:
                     self.logger.debug("Skipping Tumblr link")
                     continue
                 else:
                     self.logger.info("Tumblr post type: link")
                     e = Link()
                     e.title = post.title
                     e.summary = post.content
                     e.content = post.content
                     e.url = post.related
                     e.comments = post.url
             elif post.type == 'quote':
                 self.logger.info("Tumblr post type: quote")
                 e = Quote()
                 e.summary = post.content
                 # Chop the smart quotes that Tumblr automatically 
                 # adds to to a quote                
                 e.summary = e.summary.lstrip("“").rstrip("”")
                 e.content = e.summary
                 # Get the quote's citation, and, if possible its source
                 e.citation = post.source
                 try:
                     soup = BeautifulSoup(e.citation)
                     e.citation_url = soup.find('a').get('href')
                     e.via = e.citation_url
                 except AttributeError:
                     e.citation_url = None
             elif post.type == 'photo':
                 self.logger.info("Tumblr post type: photo")
                 e = Photo()
                 e.photo_type = 'tumblr'
                 e.title = ''
                 e.summary = post.caption
                 #e.content = e.summary
                 # post.urls is a dictionary of photo URLs keyed by size.
                 # Let's get the big one.
                 e.photo_url = post.urls['500']
                 e.cached_url = config.IMAGES_URL + '/' + e._get_cached_original_shortname()
                 self.logger.debug("Tumblr photo URL: '%s'" % e.photo_url)
                 e.cache()
                 e.set_dimensions()
                 e.set_content()
             # Conversation, Video, and Audio post types aren't 
             # going to be implemented for a while
             elif post.type == 'conversation':
                 # TODO: Support Tumblr conversations
                 self.logger.info("Tumblr post type: conversation")
                 continue
                 #e = Conversation()
             elif post.type == 'video':
                 # TODO: Support Tumblr videos
                 self.logger.info("Tumblr post type: video")
                 continue
                 #e = Video()
             elif post.type == 'audio':
                 # TODO: Support Tumblr audio
                 self.logger.info("Tumblr post type: audio")
                 continue
                 #e = Audio()
             e.source.name = self.name
             e.source.url = self.url
             if e.url == '':
                 e.url = post.url
             e.author = self.owner
             e.date = post.date
             e.date_parsed = parse_date(post.date)
             self.logger.debug("Tumblr post date: %s" % e.date_as_string(e.date_parsed))
             self.logger.info("Entry title: '%s'" % e.title)
             self.logger.debug("Entry URL: '%s'" % e.url)
             self.entries.append(e)
         except AttributeError:
             # FIXME: Why is this exception handler here???
             pass
Ejemplo n.º 21
0
 def testContentTypeApplicationXml(self):
     """Should accept HTTP content-type application/xml."""
     log = tumblr.parse(self.urlContentTypeApplicationXml)
     # Just do anything
     assert log.name == u'demo'