def testHTTP410Gone(self): """Error thrown if URL has been removed (HTTP 410).""" try: tumblr.parse(self.urlGone) except tumblr.URLGoneError: pass else: self.fail("Expected a URLGoneError!")
def testHTTP500ServerError(self): """Error thrown if HTTP 500 occurs.""" try: tumblr.parse(self.urlServerError) except tumblr.InternalServerError: pass else: self.fail("Expected an InternalServerError!")
def testHTTP403Forbidden(self): """Error thrown if URL is forbidden (HTTP 403).""" try: tumblr.parse(self.urlForbidden) except tumblr.URLForbiddenError: pass else: self.fail("Expected a URLForbiddenError!")
def testHTTP404NotFound(self): """Error thrown if URL cannot be found (HTTP 404).""" try: tumblr.parse(self.urlNotFound) except tumblr.URLNotFoundError: pass else: self.fail("Expected a URLNotFoundError!")
def testMalformedXML(self): """Error thrown if XML is not well-formed.""" try: tumblr.parse(self.urlMalformed) except tumblr.TumblrParseError: pass else: fail("Expected a TumblrParseError for malformed XML!")
def testUnencodedAmpersand(self): """Error thrown if ampersand is unencoded.""" try: tumblr.parse(self.urlBadAmpersand) except tumblr.TumblrParseError: pass else: fail("Expected a TumblrParseError for malformed XML due to unencoded ampersand!")
def testContentTypeApplicationXhtmlXml(self): """Error thrown if content-type application/xhtml+xml used.""" try: tumblr.parse(self.urlContentTypeApplicationXhtmlXml) except tumblr.UnsupportedContentTypeError: pass else: self.fail("Expected an UnsupportedContentTypeError!")
def testHTTP503ServiceUnavailableError(self): """Error thrown if HTTP 503 occurs.""" try: tumblr.parse(self.urlServiceUnavailable) except tumblr.ServiceUnavailableError: pass else: self.fail("Expected a ServiceUnavailableError!")
def testUnreachableHost(self): """Error thrown if host cannot be reached.""" from httplib2 import ServerNotFoundError try: tumblr.parse(self.urlUnreachable) except ServerNotFoundError: pass else: self.fail("Expected a ServerNotFoundError!")
def testString(self): """An XML string can be passed to the parser.""" f = open(self.filename, 'r') xmlString = f.read() f.close() log = tumblr.parse(xmlString) assert log.title == 'golden hours'
def update_tumblr(service): import tumblr t = tumblr.parse('http://%s.tumblr.com/api/read' %service.args['site'], cache_dir=settings.TUMBLR_CACHE_DIR) entries = [] for p in t.posts: pub_date = datetime.strptime(p.date_gmt, '%Y-%m-%d %H:%M:%S %Z') if pub_date > service.updated: uuid = p.url if (p.type == 'link'): update_type = 'Link' data = {'title' : p.title, 'url' : p.url, 'desc' : p.description, 'type' : p.type, 'link_url' : p.link_url, 'via' : p.via} typ = 'link' elif p.type == 'quote': None else: None # TODO if typ: entry = Entry(uuid=uuid, service=service, desc='Tumblr %s' %update_type, data=json.dumps(data), pub_date=pub_date, typ=typ) entries.append(entry) return entries
def setUp(self): self.urls = ( "http://golden.cpl593h.net/api/read", "http://thelongesttrainieversaw.tumblr.com/api/read", "http://industry.tumblr.com/api/read", "http://marco.tumblr.com/api/read", "http://demo.tumblr.com/api/read" ) # self.urls = ( "http://marco.tumblr.com/api/read", "http://www.marco.org/api/read" ) self.logs = [] for url in self.urls: self.logs.append(tumblr.parse(url))
def start(self): out = open("../data/tumblr_output.txt", "w") ins = open( "/tumblr_urls.txt", "r" ) for line in ins: print line data = tumblr.parse(line.strip()) for p in data.posts: if p.__class__ is tumblr.Regular: #pprint(p soup = BeautifulSoup(p.body) print 'Body :',soup.text if soup.text is not None: out.write('Body :' + soup.text.strip().encode('utf-8')) #pass elif p.__class__ is tumblr.Photo: pass # print p.urls ins.close() out.close()
def testOpenFile(self): """An open file object can be passed to the parser.""" f = open(self.filename, 'r') log = tumblr.parse(f) f.close() assert log.title == 'golden hours'
def setUp(self): self.url = "http://chompy.net/lab/tumblrapi/tests/tumblelog/sourcefeeds.xml" self.log = tumblr.parse(self.url)
def setUp(self): self.url = "http://chompy.net/lab/tumblrapi/tests/tumblelog/regular.xml" self.log = tumblr.parse(self.url)
def setUp(self): self.url = 'http://chompy.net/lab/tumblrapi/tests/tumblelog/demo.xml' self.log = tumblr.parse(self.url)
def testHTTP301MovedPermanently(self): """Redirect via HTTP 301 is recorded.""" log = tumblr.parse(self.urlMovedPermanently) assert (log.http_response.previous.status == 301) and (log.http_response['content-location'] == self.urlRedirectDestination)
def testHTTP307(self): """Redirect via HTTP 307 is recorded.""" log = tumblr.parse(self.urlTemporaryRedirect) assert (log.http_response.previous.status == 307) and (log.http_response['content-location'] == self.urlRedirectDestination)
def parse(self): """Fetches Tumblr API data and parses it.""" self.logger.info("Fetching API data at '%s'" % self.api_url) self.http_response, self.http_content = spider.fetch(self.api_url) self.logger.info("Parsing API data for entries...") t = tumblr.parse(self.api_url) for post in t.posts: try: if post.type == 'regular': self.logger.info("Tumblr post type: regular") e = Post() e.title = post.title e.summary = post.content e.content = post.content elif post.type == 'link': if 'link' in self.excluded_types: self.logger.debug("Skipping Tumblr link") continue else: self.logger.info("Tumblr post type: link") e = Link() e.title = post.title e.summary = post.content e.content = post.content e.url = post.related e.comments = post.url elif post.type == 'quote': self.logger.info("Tumblr post type: quote") e = Quote() e.summary = post.content # Chop the smart quotes that Tumblr automatically # adds to to a quote e.summary = e.summary.lstrip("“").rstrip("”") e.content = e.summary # Get the quote's citation, and, if possible its source e.citation = post.source try: soup = BeautifulSoup(e.citation) e.citation_url = soup.find('a').get('href') e.via = e.citation_url except AttributeError: e.citation_url = None elif post.type == 'photo': self.logger.info("Tumblr post type: photo") e = Photo() e.photo_type = 'tumblr' e.title = '' e.summary = post.caption #e.content = e.summary # post.urls is a dictionary of photo URLs keyed by size. # Let's get the big one. e.photo_url = post.urls['500'] e.cached_url = config.IMAGES_URL + '/' + e._get_cached_original_shortname() self.logger.debug("Tumblr photo URL: '%s'" % e.photo_url) e.cache() e.set_dimensions() e.set_content() # Conversation, Video, and Audio post types aren't # going to be implemented for a while elif post.type == 'conversation': # TODO: Support Tumblr conversations self.logger.info("Tumblr post type: conversation") continue #e = Conversation() elif post.type == 'video': # TODO: Support Tumblr videos self.logger.info("Tumblr post type: video") continue #e = Video() elif post.type == 'audio': # TODO: Support Tumblr audio self.logger.info("Tumblr post type: audio") continue #e = Audio() e.source.name = self.name e.source.url = self.url if e.url == '': e.url = post.url e.author = self.owner e.date = post.date e.date_parsed = parse_date(post.date) self.logger.debug("Tumblr post date: %s" % e.date_as_string(e.date_parsed)) self.logger.info("Entry title: '%s'" % e.title) self.logger.debug("Entry URL: '%s'" % e.url) self.entries.append(e) except AttributeError: # FIXME: Why is this exception handler here??? pass
def testContentTypeApplicationXml(self): """Should accept HTTP content-type application/xml.""" log = tumblr.parse(self.urlContentTypeApplicationXml) # Just do anything assert log.name == u'demo'