예제 #1
0
	def downloadReq(self, view, download):
		"""
		Signal called on right click, save-something.
		"""
		uri = download.get_uri()
		self.opener.downloadFile(uri,"unknown","",type_of(uri),uri,uri)
예제 #2
0
    def downloadReq(self, view, download):
        """
		Signal called on right click, save-something.
		"""
        uri = download.get_uri()
        self.opener.downloadFile(uri, "unknown", "", type_of(uri), uri, uri)
예제 #3
0
	def __init__(self, url, contentType, source):
		# Initialized each time...
		self.Redirect = "" # URL to redirect to.
		self.Title = ""
		self.HTML = "" # The description-html, top panel.
		self.itemId = "" # The specific item selected.
		self.singleItem = False
		self.NormalPage = False
		self.podcast = ""
		self.bgcolor = ""
		self.mediaItems = [] #List of items to place in Liststore.
		self.tabMatches = []
		self.tabLinks = []
		self.last_text = "" # prevent duplicates from shadow-text.

		self.url = url
		self.contentType = contentType
		self.source = source
		sttime = time.time()
		try: #parse as xml
			# Remove bad XML. See:
			# http://stackoverflow.com/questions/1016910/how-can-i-strip-invalid-xml-characters-from-strings-in-perl
			bad = "[^\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD]"
			self.source = re.sub(bad, " ", self.source) # now it should be valid xml.
			source_cleaned = self.source.replace('xmlns="http://www.apple.com/itms/"', '') #(this xmlns causes problems with xpath)
			dom = fromstring(source_cleaned)
			if dom.tag.find("html") > -1 or dom.tag == "{http://www.w3.org/2005/Atom}feed":
				# Don't want normal pages/atom pages, those are for the web browser!
				raise Exception
			elif dom.tag == "rss": # rss files are added
				self.HTML += "<p>This is a podcast feed, click Add to Podcast manager button on the toolbar to subscribe.</p>"
				items = dom.xpath("//item")
				logging.debug("rss: " + str(len(items)))
				for item in items:
					title = ""
					author = ""
					linkurl = ""
					duration = ""
					url = ""
					description = ""
					pubdate = ""
					for i in item:
						if i.tag == "title":
							title = i.text
						elif i.tag == "author" or i.tag.endswith("author"):
							author = i.text
						elif i.tag == "link":
							linkurl = i.text
						elif i.tag == "description":
							description = i.text
						elif i.tag == "pubDate":
							pubdate = i.text
						elif i.tag == "enclosure":
							url = i.get("url")
						elif i.tag.endswith("duration"):
							duration = i.text
					self.addItem(title,
						     author,
						     duration,
						     type_of(url),
						     description,
						     pubdate,
						     "",
						     linkurl,
						     url,
						     "",
						     "")
			else:
				self.seeXMLElement(dom)
		except Exception, e:
			logging.debug("ERR: " + str(e))
			logging.debug("Parsing as HTML, not as XML.")
			ustart = self.source.find("<body onload=\"return open('")
			if ustart > -1:  # This is a redirect-page.
				newU = self.source[ustart+27:self.source.find("'", ustart+27)]
				self.Redirect = newU
			logging.debug("Parsing HTML")
			self.HTML = self.source
			source_cleaned = self.source.replace('<html xmlns="http://www.apple.com/itms/"', '<html')
			dom = lxml.html.document_fromstring(source_cleaned)
			self.seeHTMLElement(dom)
예제 #4
0
	def seeHTMLElement(self, element):
		if isinstance(element.tag, str): # normal element
			if (element.get("comparison") == "lt" or
			    (element.get("comparison") and
			     element.get("comparison").find("less") > -1)):
				return #Ignore child nodes.
			if element.tag == "tr" and element.get("dnd-clipboard-data"):
				data = json.loads(element.get("dnd-clipboard-data"))
				itemid = ""
				title = ""
				artist = ""
				duration = ""
				url = ""
				gotou = ""
				price = "0"
				comment = ""
				if ('itemName' in data):
					title = data['itemName']
				if ('artistName' in data):
					artist = data['artistName']
				if ('duration' in data):
					duration = time_convert(data['duration'])
				if ('preview-url' in data):
					url = data['preview-url']
				if ('playlistName' in data):
					comment = data['playlistName']
				if ('url' in data):
					gotou = data['url']
				if ('price' in data):
					price = data['price']
				if ('itemId' in data):
					itemid = data['itemId']
				self.addItem(title,
					     artist,
					     duration,
					     type_of(url),
					     comment,
					     "",
					     "",
					     gotou,
					     url,
					     price,
					     itemid)
			elif (element.get("audio-preview-url") or
			      element.get("video-preview-url")):
				if element.get("video-preview-url"):
					url = element.get("video-preview-url")
				else:
					url = element.get("audio-preview-url")
				title = ""
				if element.get("preview-title"):
					title = element.get("preview-title")
				author = ""
				if element.get("preview-artist"):
					author = element.get("preview-artist")
				duration = ""
				if element.get("preview-duration"):
					duration = time_convert(element.get("preview-duration"))
				logging.debug("preview-url adding row")
				self.addItem(title, author, duration, type_of(url), "", "", "", "", url, "", "")
			elif (element.tag == "button" and
			      element.get("anonymous-download-url") and
			      element.get("kind") and
			      (element.get("title") or element.get("item-name"))):#Added for epub feature
				logging.debug("button row adding")
				title = ""
				artist = ""
				if element.get("title"):
					title = element.get("title")
				if element.get("item-name"):
					title = element.get("item-name")
				if element.get("preview-artist"):
					artist = element.get("preview-artist")
				self.addItem(title,
					     artist,
					     "",
					     type_of(element.get("anonymous-download-url")),
					     "",
					     "",
					     "",
					     element.get("anonymous-download-url"),
					     "",
					     "",
					     element.get("adam-id"))
			elif (element.tag == "button" and
			     element.get("episode-url")):
				title = ""
				artist = ""
				url = ""
				itemid=""
				if element.get("aria-label"):
					title = element.get("aria-label")
					if title.startswith("Free Episode, "):
						title = title[14:]
				if element.get("artist-name"):
					artist = element.get("artist-name")
				if element.get("episode-url"):
					url = element.get("episode-url")
				mytype = type_of(url)
				if element.get("disabled") is not None:
					mytype = ".zip" # wrong ext. fix it.
				self.addItem(title,
					     artist,
					     "",
					     mytype,
					     "",
					     "",
					     "",
					     "",
					     url,
					     "",
					     itemid)
				
			else: # go through the childnodes.
				for i in element:
					self.seeHTMLElement(i)
예제 #5
0
class ParserBase(object):
	def __init__(self, url, contentType, source):
		# Initialized each time...
		self.Redirect = "" # URL to redirect to.
		self.Title = ""
		self.HTML = "" # The description-html, top panel.
		self.itemId = "" # The specific item selected.
		self.singleItem = False
		self.NormalPage = False
		self.podcast = ""
		self.bgcolor = ""
		self.mediaItems = [] #List of items to place in Liststore.
		self.tabMatches = []
		self.tabLinks = []
		self.last_text = "" # prevent duplicates from shadow-text.

		self.url = url
		self.contentType = contentType
		self.source = source
		sttime = time.time()
		try: #parse as xml
			# Remove bad XML. See:
			# http://stackoverflow.com/questions/1016910/how-can-i-strip-invalid-xml-characters-from-strings-in-perl
			bad = "[^\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD]"
			self.source = re.sub(bad, " ", self.source) # now it should be valid xml.
			source_cleaned = self.source.replace('xmlns="http://www.apple.com/itms/"', '') #(this xmlns causes problems with xpath)
			dom = fromstring(source_cleaned)
			if dom.tag.find("html") > -1 or dom.tag == "{http://www.w3.org/2005/Atom}feed":
				# Don't want normal pages/atom pages, those are for the web browser!
				raise Exception
			elif dom.tag == "rss": # rss files are added
				self.HTML += "<p>This is a podcast feed, click Add to Podcast manager button on the toolbar to subscribe.</p>"
				items = dom.xpath("//item")
				logging.debug("rss: " + str(len(items)))
				for item in items:
					title = ""
					author = ""
					linkurl = ""
					duration = ""
					url = ""
					description = ""
					pubdate = ""
					for i in item:
						if i.tag == "title":
							title = i.text
						elif i.tag == "author" or i.tag.endswith("author"):
							author = i.text
						elif i.tag == "link":
							linkurl = i.text
						elif i.tag == "description":
							description = i.text
						elif i.tag == "pubDate":
							pubdate = i.text
						elif i.tag == "enclosure":
							url = i.get("url")
						elif i.tag.endswith("duration"):
							duration = i.text
					self.addItem(title,
						     author,
						     duration,
						     type_of(url),
						     description,
						     pubdate,
						     "",
						     linkurl,
						     url,
						     "",
						     "")
			else:
				self.seeXMLElement(dom)
		except Exception, e:
			logging.debug("ERR: " + str(e))
			logging.debug("Parsing as HTML, not as XML.")
			ustart = self.source.find("<body onload=\"return open('")
			if ustart > -1:  # This is a redirect-page.
				newU = self.source[ustart+27:self.source.find("'", ustart+27)]
				self.Redirect = newU
			logging.debug("Parsing HTML")
			self.HTML = self.source
			source_cleaned = self.source.replace('<html xmlns="http://www.apple.com/itms/"', '<html')
			dom = lxml.html.document_fromstring(source_cleaned)
			self.seeHTMLElement(dom)

		items = []
		arr = self.getItemsArray(dom) # get the tracks list element

		keys = dom.xpath("//key") # important parts of document! this is only calculated once to save time
		# Now get location path:
		# location description and links and last location in location bar.
		location = []
		locationLinks = []
		lastloc = ""
		locationelements = dom.xpath("//Path")
		if len(locationelements) > 0:
			for i in locationelements[0]:
				if (type(i).__name__ == '_Element' and i.tag == "PathElement"):
					location.append(i.get("displayName"))
					locationLinks.append(i.text)

		if location == ["iTunes U"]:
			section = dom.xpath("//HBoxView") # looking for first section with location info.
			if len(section) > 0: # may be out of range
				section = section[0]
				for i in section:
					if (type(i).__name__ == '_Element'):
						for j in i:
							if type(j).__name__ == '_Element' and j.tag == "GotoURL":
								location.append(j.text.strip())
								locationLinks.append(j.get("url"))
								logging.debug(j.text.strip() + j.get("url"))
								lastloc = j.get("url")
				if self.textContent(section).find(">") > -1:
					section.getparent().remove(section) # redundant section > section ... info is removed.

		if arr is None:
			ks = dom.xpath("/Document/Protocol/plist/dict/array/dict")
			if len(ks):
				arr = ks
				logging.debug("Special end page after html link?" + str(len(ks)))
				if (len(ks) == 1 and
				    dom.get("disableNavigation") == "true" and
				    dom.get("disableHistory") == "true"):
					self.singleItem = True
		logging.debug("tag " + dom.tag)

		if arr is None: # No tracklisting.
			hasmedia = False
			if len(self.mediaItems) == 0:
				logging.debug("nothing here!")
		else: # add the tracks:
			# TODO: Add XML page's elements to the top panel, so the bottom panel isn't necessary.
			hasmedia = True
			# for each item...
			for i in arr:
				if type(i).__name__ == '_Element' and i.tag == "dict":
					# for each <dict> track info....</dict> get this information:
					name = ""
					artist = ""
					duration = ""
					comments = ""
					rtype = ""
					url = ""
					directurl = ""
					releaseDate = ""
					modifiedDate = ""
					id = ""
					for j in i:
						if j.tag == "key": # get each piece of data:
							if j.text in ["songName", "itemName"]:
								t = j.getnext().text
								if t:
									name = t
							elif j.text == "artistName":
								t = j.getnext().text
								if t:
									artist = t
							elif j.text == "duration":
								t = j.getnext().text
								if t:
									duration = t
							elif j.text in ["comments", "description", "longDescription"]:
								t = j.getnext().text
								if t:
									comments = t
							elif j.text == "url":
								t = j.getnext().text
								if t:
									url = t
							# Added Capital "URL", for the special case end page after html link.
							elif j.text in ["URL", "previewURL", "episodeURL", "preview-url"]:
								t = j.getnext().text
								if t:
									directurl = t
							elif j.text == "explicit":
								el = j.getnext()
								if el.text == "1":
									rtype = "[Explicit] "
								if el.text == "2":
									rtype = "[Clean] "
							elif j.text == "releaseDate":
								t = j.getnext().text
								if t:
									releaseDate = t
							elif j.text == "dateModified":
								t = j.getnext().text
								if t:
									modifiedDate = t
							elif j.text == "itemId":
								t = j.getnext().text
								if t:
									id = t
							elif j.text == "metadata": # for the special case end page after html link
								i.extend(j.getnext().getchildren()) # look inside this <dict><key></key><string></string>... also.
					self.addItem(name,
						     artist,
						     time_convert(duration),
						     type_of(directurl),
						     rtype + comments,
						     self.formatTime(releaseDate),
						     self.formatTime(modifiedDate),
						     url,
						     directurl,
						     "",
						     id)

		# Now put page details in the detail-box on top.
		if dom.tag == "rss":
			out = ""
			image = dom.xpath("/rss/channel/image/url")
			if len(image) > 0:
				# get recommended width, height:
				w, h = None, None
				try:
					w = dom.xpath("/rss/channel/image/width")[0].text
					h = dom.xpath("/rss/channel/image/height")[0].text
				except:
					pass
				self.HTML += self.imgText(image[0].text, h, w)
			#else: # TODO: fix this namespace problem
				#image = dom.xpath("/rss/channel/itunes:image",namespaces={'itunes': 'http://www.itunes.com/DTDs/Podcast-1.0.dtd'})[0]
				#if len(image)>0...
			channel = dom.xpath("/rss/channel")
			if len(channel):
				for i in channel[0]:
					if not(image) and i.tag == "{http://www.itunes.com/dtds/podcast-1.0.dtd}image":
						self.HTML += self.imgText(i.get("href"), None, None)
				for i in channel[0]:
					if i.text and i.text.strip() != "" and isinstance(i.tag, str):
						thisname = "".join(i.tag.replace("{", "}").split("}")[::2]) # remove {....dtd} from tag
						self.HTML += "<b>%s:</b> %s\n<br>" % (thisname, i.text)
				try:
					self.Title = (dom.xpath("/rss/channel/title")[0].text)
				except IndexError, e:
					logging.warn('Error using index ' + str(e))