def EPISOADE(continut,sezon): #link=get_url(url) #<h3 align="center" style="color:#000;"> Supernatural 2005 online - sezonul 8</h3> #match=re.compile('<h3.+?>(.+?)online - (sezonul [0-9]+).*?</h3>', re.IGNORECASE).findall(link) match=re.compile('<td class="ep">(.+?)</td><td class="tf">(.+?)</td><td class="ad"><a href="(.+?)".+?target=', re.IGNORECASE).findall(continut) #print match for episod,nume,legatura in match: legatura=legatura.replace(" ", "%20") #print legatura episod = htmlcleaner.clean(episod, strip=False) nume = htmlcleaner.clean(nume, strip=False) nume = nume.replace('<font color="#000">',"") nume = nume.replace('</font>',"") if 'Episodul ' in episod: episod = episod.replace('Episodul ',"EP") if 'Episod ' in episod: episod = episod.replace('Episod ',"EP") sezon=sezon.strip() if 'Sezonul ' in sezon: sezon = sezon.replace('Sezonul ',"S0") titlu = sezon+' '+episod+' - '+nume addDirSort(titlu,legatura,8,'')
def CLEANUP_FOR_META(name): # cleaner for when using a name for a metadata lookup # left these legacy functions in even thought they are not really needed # thanks to htmlcleaner. might help with some meta lookups name = re.sub("'", "'", name) name = re.sub("&", "&", name) name = re.sub("Æ", "AE", name) name = re.sub("'", "'", name) name = re.sub("í", "i", name) name = re.sub("½", " 1/2", name) name = re.sub("½", " 1/2", name) name = re.sub("&", "&", name) name = re.sub(""", "", name) name = re.sub("ô", "o", name) name = re.sub("é", "e", name) name = re.sub("ë", "e", name) name = re.sub("ø", "o", name) name = re.sub("â", "a", name) name = re.sub("û", "u", name) name = re.sub("'", "'", name) name = re.sub("á", "a", name) name = re.sub("ü", "u", name) # run the unicode cleaner, but strip unicode to ASCII name = htmlcleaner.clean(name, strip=True) return name
def nameCleaner(name): name = name.replace('–','') name = name.replace("’","") name = name.replace("'s","'s") name = unicode(name, errors='ignore') name = htmlcleaner.clean(name,strip=True) return(name)
def CLEANUP_FOR_META(name): #cleaner for when using a name for a metadata lookup # left these legacy functions in even thought they are not really needed # thanks to htmlcleaner. might help with some meta lookups name=re.sub(''',"'",name) name=re.sub('&','&',name) name=re.sub('Æ','AE',name) name=re.sub(''',"'",name) name=re.sub('í','i',name) name=re.sub('½',' 1/2',name) name=re.sub('½',' 1/2',name) name=re.sub('&','&',name) name=re.sub('"','',name) name=re.sub('ô','o',name) name=re.sub('é',"e",name) name=re.sub('ë',"e",name) name=re.sub('ø',"o",name) name=re.sub('â',"a",name) name=re.sub('û',"u",name) name=re.sub(''',"'",name) name=re.sub('á',"a",name) name=re.sub('ü',"u",name) #run the unicode cleaner, but strip unicode to ASCII name = htmlcleaner.clean(name,strip=True) return name
def Episodes(url, page): page = int(page) + 1 DOTCOM, THEPAGE, API = GetLang() headers = { 'Referer': 'http://www.nickjr.%s' % DOTCOM, 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 (.NET CLR 3.5.30729)' } link = net.http_GET( 'http://www.nickjr.%s/data/property%s.json?&urlKey=%s&apiKey=%s&page=%s' % (DOTCOM, THEPAGE, url, API, page), headers=headers).content link = json.loads(link) data = link['stream'] for k in data: for w in k['items']: try: try: URL = w['data']['id'] except: URL = None try: duration = ' - [' + w['data']['duration'] + ']' except: duration = '' try: name = w['data']['title'] + duration except: try: name = htmlcleaner.cleanUnicode( w['data']['title']) + duration except: try: name = htmlcleaner.clean( w['data']['title']) + duration except: name = '' try: iconimage = w['data']['images']['thumbnail']['r1-1'] except: try: iconimage = w['data']['images']['thumbnail']['r25-12'] except: iconimage = '' try: plot = htmlcleaner.cleanUnicode(w['data']['description']) except: plot = '' if URL: addDir(name, URL, 200, iconimage, plot) except: pass if data: addDir('Next Page >>', url, 1, '', str(page)) setView('movies', 'episode-view')
def SERIALE(url): link=get_url(url) match=re.compile('<li><a.+?href="(.+?.php)" title=".+?">(.+?)</a></li>', re.IGNORECASE).findall(link) leg=list(set(match)) for legatura,nume in leg: nume = nume.strip() nume = htmlcleaner.clean(nume, strip=False) the_link = 'http://www.serial-online.ro/'+legatura addDirSort(nume,the_link,6,'')#fost 7
def SEZOANE(url,name): link=get_url(url) match=re.compile('<h3 align="center" style="color:#000;">(.+?)<', re.IGNORECASE).findall(link) #content = link.split(match[0])[1] content = link.split('<h3 align="center" style="color:#000;">')#[2] for x in range(1, len(content)): sezon = match[x-1].replace(' online - s',' - S') if " »" in sezon: sezon = sezon.replace(' »','') sezon = htmlcleaner.clean(sezon) continut = content[x] addDirSort(sezon,continut,7,'')
def FILME_INFO(url): raw=get_url(url) cpag = raw.split("<div id='numarpagini'>")[1] cpag = cpag.split("</div>")[0] match=re.compile('<a.+?href="(.+?)">([0-9]+)</a>.+?', re.IGNORECASE).findall(cpag) match.reverse() masterlink = re.compile('(.+?)[0-9]+.html', re.IGNORECASE).findall(match[0][0])[0] maxpag = match[0][1] print masterlink, maxpag x = 1 while x <= int(maxpag): urlpag='http://www.990.ro/'+masterlink+str(x)+'.html' print urlpag x=x+1 content = raw.split("<div align='center' id='coloana-stanga'>")[1] content = content.split("<div align='center' id='coloana-dreapta'>")[0] filme=re.compile('width:630px(.+?)<iframe', re.DOTALL).findall(content) #print filme[0] #<img src='../poze/filme/240/210278.jpg' alt=' #inf=re.compile("<div.+?><a href='(.+?)'><img src='..(.+?.jpg)'.+?'link'>(.+?)</a> \(([0-9]+)\).+?", re.DOTALL).findall(filme[0]) #print inf for film in filme: inf=re.compile("<div.+?><a href='(.+?)'><img src='..(.+?.jpg)'.+'link'>(.+?)</a> \(([0-9]+)\).+<div align='left'.+? font:14px Tahoma;'>(.+?)<.+?<img src='images/nota.png' alt='nota (.+?)'.+Gen:(.+?)</div>.+weight:bold;'>(.+?)</div>", re.DOTALL).findall(film) for link, img, titlu, an, titlu_ro, nota, gen, descriere in inf: link = 'http://www.990.ro/'+link img = 'http://www.990.ro'+img titlu = htmlcleaner.clean(titlu,True) titlu_ro = htmlcleaner.clean(titlu_ro).replace("\n","").replace("\t","") gen = htmlcleaner.clean(gen).replace("\n","").replace("\t","") descriere = htmlcleaner.clean(descriere).replace("\n","").replace("\t","") nota = float(nota)/2 print link, img, titlu, an,titlu_ro , nota, htmlcleaner.clean(gen), htmlcleaner.clean(descriere) addDir2(titlu,link,4,img,descriere, img, nota, int(an)) #print link, img, titlu, an#name,url,mode,iconimage,description,fanart # pagina urmatoare match=re.compile('.+?-([0-9]+).html', re.IGNORECASE).findall(url) nr_pagina = match[0] #stop XBMC if len(filme)==20: addNext('Pagina '+str(int(nr_pagina)+1)+' din '+maxpag,'http://www.990.ro/'+masterlink+str(int(nr_pagina)+1)+'.html', 1, next_thumb)
def cleanName(self, s, remove_year=False): if remove_year and re.search('\\(\\d\\d\\d\\d\\)$', s): s = s[0:len(s)-7] s = htmlcleaner.clean(s,strip=True) s = s.strip() return(s)
def AFnameCleaner(name): name = name.replace('–','') name = name.replace("’","") name = name.replace("'s","'s") name = htmlcleaner.clean(name,strip=True) return(name)