Python CommonFunctions.stripTags Examples

Programming Language: Python

Class/Type: CommonFunctions

Method/Function: stripTags

Examples at hotexamples.com: 8

Python CommonFunctions.stripTags - 8 examples found. These are the top rated real world Python examples of CommonFunctions.stripTags from package mac_apt extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ConvertSecondsToDayHourMinSec(30)

ShowErrorMessage(12)

generate_primes_less_than(10)

stripTags(8)

argumentOnly(7)

create_resultname(6)

commandOnly(6)

load_run(5)

PrintMainHeader(4)

InputError(4)

clickElement(4)

cleanField(4)

randomWait(4)

CreatePipeOutput(4)

concatTmp(3)

GetMD5HashBucketID(3)

getUnique(3)

is_pandigital(3)

load_runs(3)

find_divisors(3)

PrintMainFooter(3)

sieve_of_Erastothenes(3)

create_dir_if_not_exist(3)

ConvertSecondsToYearDayHourMinSec(3)

ask(3)

CommonFunctions(2)

daytoday(2)

getDriver(2)

is_palindrome(2)

find_prime_factors(2)

tuple_to_num(2)

GetFileHashesAsDict(2)

WriteArrayinFile(2)

clear_loaded_data(1)

is_prime(1)

generate_triangulars(1)

getLatLong(1)

GetFileCategory(1)

getUserInputNumbers(1)

GetDateSeconds(1)

GetCurrentDisplayDateTime(1)

load_mask(1)

generate_reptend_primes_less_than(1)

GetBufferHashesAsDict(1)

GenerateSearchField(1)

makeSlug(1)

makeUTF8(1)

FiletoArray(1)

FileExists(1)

Epitope_Info(1)

Example #1

Show file

File: katsomo.py Project: pasiz/plugin.video.katsomo

	def getProgramDirs(self, category=''):	
		if category=='':
			ret = common.parseDOM(self.getPage(self.URL['programdir']), "div", {'id': 'programs-by-name'})
			ret = common.parseDOM(ret, "ul", {'class': 'all-programs-list'})
		else:
			ret = common.parseDOM(self.getPage(self.URL['programdir']), "div", {'id': 'programs-by-type'})
			ret = common.parseDOM(ret, "ul", {'class': 'all-programs-list'})
			out = []
			found = False
			ret = ret[0].split('\r')
			for result in ret:
				if found:
					if not '<li class="initial"' in result:
						out.append(result)
					else:
						found = False
				if '<li class="initial">' + category + '</li>' in result:
					found = True
			ret = out
		
		retIDs = common.parseDOM(ret, "li", ret="data-id")
		retNames = common.parseDOM(ret, "li")
		programdirs=[]
		for i in range(0, len(retIDs)):
			name = retNames[i]
			id = retIDs[i]
			if 'star' in name:
				name = common.stripTags(name) + " *"
			else:
				name = common.stripTags(name)
			programdirs.append({'label': name, 'id': id })
		return programdirs

Example #2

Show file

File: htmlscraper.py Project: s0faking/plugin.video.orftvthek

	def getLaneTeasers(self,html):	
		items = common.parseDOM(html.get("content"),name='article',attrs={'class': "b-topic-teaser"},ret=False)
		
		lane_title = common.parseDOM(html.get("content"),name='h3',attrs={'class': "title"},ret=False)
		lane_title = common.replaceHTMLCodes(lane_title[0]).encode('UTF-8')
		lane_title = common.stripTags(lane_title)

		for item in items:
			title = common.parseDOM(item,name='h5',attrs={'class': "teaser-title.*?"},ret=False)
			title = common.replaceHTMLCodes(title[0]).encode('UTF-8')
			title = "[%s] %s" % (lane_title,title)
		
			link = common.parseDOM(item,name='a',attrs={},ret='href')
			link = link[0].encode('UTF-8')
			
			video_count = common.parseDOM(item,name='p',attrs={'class': "topic-video-count"},ret=False)
			desc = common.replaceHTMLCodes(video_count[0]).encode('UTF-8')

			figure = common.parseDOM(item,name='figure',attrs={'class':'teaser-img'},ret=False)
			image = common.parseDOM(figure,name='img',attrs={},ret='src')
			image = common.replaceHTMLCodes(image[0]).encode('UTF-8')
			
			link = common.parseDOM(item,name='a',ret='href')
			link = link[0].encode('UTF-8')
			link = "%s%s" % (self.__urlBase,link)

			desc = self.formatDescription(title,"","",desc,"","")
			
			parameters = {"link" : link, "banner" : image, "mode" : "getArchiveDetail"}

			url = sys.argv[0] + '?' + urllib.parse.urlencode(parameters)
			self.html2ListItem(title,image,"",desc,"","","",url,None,True, False);

Example #3

Show file

File: nrktv.py Project: monkz/xbmc-addon-nrk

def get_episodes(series_id, season_id):
    url = "http://tv.nrk.no/program/Episodes/%s/%s" % (series_id, season_id)
    html = session.get(url).text
    ul = parseDOM(html, 'ul', {'class': 'episode-list'})
    assert len(ul) == 1
    cls = parseDOM(ul, 'li', ret='class')
    items = parseDOM(ul, 'li')
    items = [items[i] for i in range(len(items)) if "no-rights" not in cls[i]]
    titles = [parseDOM(i, 'h3')[0] for i in items]
    titles = [html_decode(common.stripTags(_)) for _ in titles]
    urls = [parseDOM(i, 'a', ret='href')[0] for i in items]
    descr = [parseDOM(i, 'p')[0] for i in items]
    descr = [html_decode(common.stripTags(_)) for _ in descr]
    thumbs = repeat(_thumb_url(series_id))
    fanart = repeat(_fanart_url(series_id))
    return _programs_from_lists(titles, urls, thumbs, fanart, descr)

Example #4

Show file

File: htmlscraper.py Project: s0faking/plugin.video.orftvthek

	def getLiveStreams(self):
		html = common.fetchPage({'link': self.__urlBase})
		#html = common.fetchPage({'link': "https://office.lo-fi.at/tmp/"})
		wrapper = common.parseDOM(html.get("content"),name='main',attrs={'class': 'main'})
		section = common.parseDOM(wrapper,name='section',attrs={'class': 'b-live-program.*?'})
		items = common.parseDOM(section,name='li',attrs={'class': 'channel orf.*?'})

		for item in items:		
			channel = common.parseDOM(item,name='img',attrs={'class': 'channel-logo'},ret="alt")
			channel = common.replaceHTMLCodes(channel[0]).encode('UTF-8')

			
			bundesland_article = common.parseDOM(item,name='li',attrs={'class': '.*?is-bundesland-heute.*?'},ret='data-jsb')	
			article = common.parseDOM(item,name='article',attrs={'class': 'b-livestream-teaser.*?'})
			if not len(bundesland_article) and len(article):
				figure = common.parseDOM(article,name='figure',attrs={'class':'teaser-img'},ret=False)
				image = common.parseDOM(figure,name='img',attrs={},ret='data-src')
				image = common.replaceHTMLCodes(image[0]).encode('UTF-8')

				time = common.parseDOM(article,name='h4',attrs={'class': 'time'},ret=False)
				time = common.replaceHTMLCodes(time[0]).encode('UTF-8').replace("Uhr","").replace(".",":").strip()
				time = common.stripTags(time).encode('UTF-8')
	
				title = common.parseDOM(article,name='h4',attrs={'class': 'livestream-title.*?'})
				title = common.replaceHTMLCodes(title[0]).encode('UTF-8')

				link = common.parseDOM(item,name='a',attrs={'class': 'js-link-box'},ret="href")
				link = common.replaceHTMLCodes(link[0]).encode('UTF-8')
				
				data = common.parseDOM(item,name='a',attrs={'class': 'js-link-box'},ret="data-jsb")
				data = common.replaceHTMLCodes(data[0]).encode('UTF-8')
				data = json.loads(data)
			
				online = common.parseDOM(article,name='span',attrs={'class': 'status-online'})
				if len(online):
					online = True
				else:
					online = False				
						
				restart = common.parseDOM(article,name='span',attrs={'class': 'is-restartable'})
				if len(restart):
					restart = True
				else:
					restart = False
					
				self.buildLivestream(title,link,time,restart,channel,image,online)
			elif len(bundesland_article):
				bundesland_data = common.replaceHTMLCodes(bundesland_article[0]).encode('UTF-8')
				bundesland_data = json.loads(bundesland_data)
						
				for bundesland_item_key in bundesland_data:
					bundesland_item = bundesland_data.get(bundesland_item_key)
					bundesland_title = bundesland_item.get('title').encode('UTF-8')
					bundesland_image = bundesland_item.get('img').encode('UTF-8')
					bundesland_link = bundesland_item.get('url').encode('UTF-8')
							
					self.buildLivestream(bundesland_title,bundesland_link,"",True,channel,bundesland_image,True)

Example #5

Show file

File: htmlscraper.py Project: s0faking/plugin.video.orftvthek

	def getLaneItems(self,url):
		category = False
		html = common.fetchPage({'link': url})
		items = common.parseDOM(html.get("content"),name='article',attrs={'class': "b-teaser"},ret=False)
		
		
		
		if len(items) < 1:
			self.getLaneTeasers(html)
		else:
			lane_title = common.parseDOM(html.get("content"),name='h3',attrs={'class': "title"},ret=False)
			lane_title = common.replaceHTMLCodes(lane_title[0]).encode('UTF-8')
			lane_title = common.stripTags(lane_title)
			for item in items:
				subtitle = common.parseDOM(item,name='h4',attrs={'class': "profile"},ret=False)
				subtitle = common.replaceHTMLCodes(subtitle[0]).encode('UTF-8')			
					
				title = common.parseDOM(item,name='h5',attrs={'class': "teaser-title.*?"},ret=False)
				title = common.replaceHTMLCodes(title[0]).encode('UTF-8')
				title = "[%s] %s" % (lane_title,title)
					
				desc = common.parseDOM(item,name='p',attrs={'class': "description.*?"},ret=False)
				if len(desc):
					desc = common.replaceHTMLCodes(desc[0]).encode('UTF-8')
				else:
					desc = ""
					
				channel = common.parseDOM(item,name='p',attrs={'class': "channel"},ret=False)
				if len(channel):
					channel = common.replaceHTMLCodes(channel[0]).encode('UTF-8')
				else:
					channel = ""
				date = common.parseDOM(item,name='span',attrs={'class':'date'},ret=False)
				date = date[0].encode('UTF-8')
				
				time = common.parseDOM(item,name='span',attrs={'class':'time'},ret=False)
				time = time[0].encode('UTF-8')

				figure = common.parseDOM(item,name='figure',attrs={'class':'teaser-img'},ret=False)
				image = common.parseDOM(figure,name='img',attrs={},ret='src')
				image = common.replaceHTMLCodes(image[0]).encode('UTF-8')
				
				link = common.parseDOM(item,name='a',attrs={'class':'teaser-link.*?'},ret='href')
				link = link[0].encode('UTF-8')
					
				date_prefix = self.translation(30009).encode("utf-8")
				
				if date != "":
					title = "%s - %s" % (title,date)
					
				desc = self.formatDescription(title,channel,subtitle,desc,date,time)

				parameters = {"link" : link, "banner" : image, "mode" : "openSeries"}
				url = sys.argv[0] + '?' + urllib.parse.urlencode(parameters)
				self.html2ListItem(title,image,"", desc,"","","",url,None,True, False);

Example #6

Show file

File: htmlscraper.py Project: s0faking/plugin.video.orftvthek

	def openArchiv(self,url):
		url = urllib.parse.unquote(url)
		html = common.fetchPage({'link': url})
		container = common.parseDOM(html.get("content"),name='main',attrs={'class': "main"},ret=False)
		teasers = common.parseDOM(container,name='div',attrs={'class': "b-schedule-list"},ret=False)
		items = common.parseDOM(teasers,name='article',attrs={'class': "b-schedule-episode.*?"},ret=False)
		
		date = common.parseDOM(teasers,name='h2',attrs={'class':'day-title.*?'},ret=False)
		if len(date):
			date = date[0].encode('UTF-8')
		else:
			date = ""

		for item in items:	
			title = common.parseDOM(item,name='h4',attrs={'class': "item-title.*?"},ret=False)
			title = common.replaceHTMLCodes(title[0]).encode('UTF-8')
			
			desc = common.parseDOM(item,name='div',attrs={'class': "item-description.*?"},ret=False)
			if len(desc):
				desc = common.replaceHTMLCodes(desc[0]).encode('UTF-8')
				desc = common.stripTags(desc)
			else:
				desc = ""
			
			channel = common.parseDOM(item,name='span',attrs={'class': "small-information.meta.meta-channel-name"},ret=False)
			if len(channel):
				channel = common.replaceHTMLCodes(channel[0]).encode('UTF-8')
			else:
				channel = ""
			
			time = common.parseDOM(item,name='span',attrs={'class':'meta.meta-time'},ret=False)
			time = time[0].encode('UTF-8')
			
			title = "[%s] %s" % (time,title)
			
			subtitle = time

			figure = common.parseDOM(item,name='figure',attrs={'class':'episode-image'},ret=False)
			image = common.parseDOM(figure,name='img',attrs={},ret='src')
			image = common.replaceHTMLCodes(image[0]).encode('UTF-8')
			
			link = common.parseDOM(item,name='a',attrs={'class':'episode-content'},ret='href')
			link = link[0].encode('UTF-8')

			desc = self.formatDescription(title,channel,subtitle,desc,date,time)
			
			parameters = {"link" : link, "banner" : image, "mode" : "openSeries"}

			url = sys.argv[0] + '?' + urllib.parse.urlencode(parameters)
			self.html2ListItem(title,image,"",desc,"","","",url,None,True, False);

Example #7

Show file

File: nrktv.py Project: monkz/xbmc-addon-nrk

def get_search_results(query, page=0):
    url = "http://tv.nrk.no/sokmaxresults?q=%s&page=%s" % (query, page)
    html = session.get(url).text
    lis = parseDOM(html, 'li')

    titles = [parseDOM(li, 'img', ret='alt')[0] for li in lis]
    titles = map(html_decode, titles)

    urls = [parseDOM(li, 'a', ret='href')[0] for li in lis]
    urls = [url.replace('http://tv.nrk.no', '') for url in urls]

    descr = [parseDOM(li, 'h3')[0] for li in lis]
    descr = [html_decode(common.stripTags(_)) for _ in descr]

    thumbs = [parseDOM(li, 'img', ret='src')[0] for li in lis]
    fanart = [_fanart_url(url) for url in urls]
    return _programs_from_lists(titles, urls, thumbs, fanart, descr)

Example #8

Show file

File: htmlscraper.py Project: s0faking/plugin.video.orftvthek

	def getLinks(self,url,banner,playlist):
		url = str(urllib.parse.unquote(url))
		debugLog("Loading Videos from %s" % url,'Info')
		if banner != None:
			banner = urllib.parse.unquote(banner)

		html = common.fetchPage({'link': url})
		data = common.parseDOM(html.get("content"),name='div',attrs={'class': "jsb_ jsb_VideoPlaylist"},ret='data-jsb')
		html_data = common.parseDOM(html.get("content"),name='section',attrs={'class': "b-video-details.*?"},ret=False)
		if len(data):
			try:
				data = data[0]
				data = common.replaceHTMLCodes(data)
				data = json.loads(data)
				current_preview_img = data.get("selected_video")["preview_image_url"]
				video_items = data.get("playlist")["videos"]
				current_title = data.get("selected_video")["title"]
				current_desc = ""
				
				current_channel = common.parseDOM(html_data,name='span',attrs={'class': "channel.*?"},ret='aria-label')
				if len(current_channel):
					current_channel = common.replaceHTMLCodes(current_channel[0]).encode('UTF-8')
				else:
					current_channel = ""
				
				current_date = common.parseDOM(html_data,name='span',attrs={'class':'date'},ret=False)
				current_date = current_date[0].encode('UTF-8')
			
				current_time = common.parseDOM(html_data,name='span',attrs={'class':'time'},ret=False)
				current_time = current_time[0].encode('UTF-8')
				
				current_subtitle = common.parseDOM(html_data,name='p',attrs={'class': "profile.*?"},ret=False)
				if len(current_subtitle):
					current_subtitle = common.stripTags(common.replaceHTMLCodes(current_subtitle[0])).encode('UTF-8')	
				else:
					current_subtitle = ""
				
				if data.get("selected_video")["description"]:
					current_desc = data.get("selected_video")["description"].encode('UTF-8')
				else:
					if len(html_data):
						html_desc = common.parseDOM(html_data,name='p',attrs={'class': "description-text.*?"},ret=False)
						current_desc =  common.stripTags(common.replaceHTMLCodes(html_desc[0]).encode('UTF-8'))

				if data.get("selected_video")["duration"]:
					current_duration = float(data.get("selected_video")["duration"])
					current_duration = int(current_duration / 1000)
				else:
					current_duration = 0

				
				if "subtitles" in data.get("selected_video"):
					current_subtitles = []
					for sub in data.get("selected_video")["subtitles"]:
						current_subtitles.append(sub.get(u'src'))
				else:
					current_subtitles = None
				current_videourl = self.getVideoUrl(data.get("selected_video")["sources"]);
				
				current_desc = self.formatDescription(current_title,current_channel,current_subtitle,current_desc,current_date,current_time)
				
			except Exception as e:
				debugLog("Error Loading Episode from %s" % url,'Exception')
				#notifyUser((self.translation(30052)).encode("utf-8"))
				current_subtitles = None

			if len(video_items) > 1:
				play_all_name = "[ "+(self.translation(30015)).encode("utf-8")+" ]"
				debugLog("Found Video Playlist with %d Items" % len(video_items),'Info')
				createPlayAllItem(play_all_name,self.pluginhandle)						   
				for video_item in video_items:
					try:
						title = video_item["title"].encode('UTF-8')
						if video_item["description"]:
							desc = video_item["description"].encode('UTF-8')
						else:
							debugLog("No Video Description for %s" % title,'Info')
							desc = ""

						if video_item["duration"]:
							duration = float(video_item["duration"])
							duration = int(duration / 1000)
						else:
							duration = 0

						preview_img = video_item["preview_image_url"]
						sources = video_item["sources"]
						if "subtitles" in video_item:
							debugLog("Found Subtitles for %s" % title,'Info')
							subtitles = []
							for sub in video_item["subtitles"]:
								subtitles.append(sub.get(u'src'))
						else:
							subtitles = None
						videourl = self.getVideoUrl(sources);

						liz = self.html2ListItem(title,preview_img,"",desc,duration,'','',videourl, subtitles,False, True)
						playlist.add(videourl,liz)
					except Exception as e:
						debugLog(str(e),'Error')
						continue
				return playlist
			else:
				debugLog("No Playlist Items found for %s. Setting up single video view." % current_title.encode('UTF-8'),'Info')
				liz = self.html2ListItem(current_title,current_preview_img,"",current_desc,current_duration,'','',current_videourl, current_subtitles,False, True)
				playlist.add(current_videourl,liz)
				return playlist
		else:
			notifyUser((self.translation(30052)).encode("utf-8"))
			sys.exit()