Python BeautifulSOAP Exemples, BeautifulSoup.BeautifulSOAP Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : script.py Projet : aryeguy/subtitles

def download_subtitle(series_name, season, episode, output_dir):
    """

    :param series_name:
    :param season:
    :param episode:
    """
    dir_name = os.path.join(output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode))
    if not os.access(dir_name, os.F_OK):
        os.makedirs(dir_name)
    os.chdir(dir_name)
    search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season, episode).replace(" ", "+")
    logging.info("Search url: {}".format(search_url))
    search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read())
    # TODO handle zero matches
    search_results_table = search_page_data.find(attrs={"class": "list first_column_title"})
    results = search_results_table.tbody.findAll("tr")
    results = [x for x in results if x.findAll("td")[2].a.div["alt"] == "English subtitles"]
    results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results]
    for result in results:
        download_page_url = BASE_URL + "/he/" + result
        download_page_data = urllib2.urlopen(download_page_url).read()
        download_button = BeautifulSOAP(download_page_data).find(attrs={"class": "button big download"})
        download_url = BASE_URL + download_button.get("href")
        download_data = urllib2.urlopen(download_url).read()
        zip_filename = "output.zip"
        open(zip_filename, "wb").write(download_data)
        subprocess.call(["unzip", "-o", zip_filename])
        logging.info("Unzipped {}".format(zip_filename))
        os.unlink(zip_filename)

Exemple #2

0

Afficher le fichier

Fichier : ddizi.py Projet : kadztr/ddizi

def panel(url):
        link=araclar.get_url(web)
        soup=BS(link.decode('utf-8','ignore'))
        div = soup.findAll("div",{"class":"blok-liste"})
        for li in div[int(url)].findAll('li'):#-------------dizi anasayfalari bulur
                url= li.a['href']
                name = li.a.text
                name=name.encode("utf-8")
                araclar.addDir(fileName,name,"kategoriler(url)",url,"YOK")

Exemple #3

0

Afficher le fichier

Fichier : demo_spider2.py Projet : coolws/crawler

    def run(self):
        #resp = urllib2.urlopen(self.url)
        #print self.url, resp.getcode()
        req = urllib2.Request(url=self.url,headers=headers)
        content = urllib2.urlopen(req)
        soup = BeautifulSOAP(content,fromEncoding="gb18030")
#print soup.originalEncoding
#print  soup.prettify()
        songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')})
#print dir(songlist[0])
        for song in songlist:
            song_url=''
            song_url= 'www.xiami.com' + song.get('href')
            print song_url ,song.string

Exemple #4

0

Afficher le fichier

Fichier : default.py Projet : uguer30/Project

def getSoup(url, data=None):
    print 'getsoup', url, data
    if url.startswith('http://') or url.startswith('https://'):
        data = makeRequest(url)
        if re.search("#EXTM3U", data) or 'm3u' in url:
            print 'found m3u data', data
            return data

    elif data == None:
        if xbmcvfs.exists(url):
            if url.startswith("smb://") or url.startswith("nfs://"):
                copy = xbmcvfs.copy(
                    url, os.path.join(profile, 'temp', 'sorce_temp.txt'))
                if copy:
                    data = open(
                        os.path.join(profile, 'temp', 'sorce_temp.txt'),
                        "r").read()
                    xbmcvfs.delete(
                        os.path.join(profile, 'temp', 'sorce_temp.txt'))
                else:
                    addon_log("failed to copy from smb:")
            else:
                data = open(url, 'r').read()
                if re.match("#EXTM3U", data) or 'm3u' in url:
                    print 'found m3u data', data
                    return data
        else:
            addon_log("Soup Data not found!")
            return
    return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)

Exemple #5

0

Afficher le fichier

Fichier : default.py Projet : cjb1312/plugin.video.live.streams

def getSoup(url):
    if url.startswith('http://'):
        data = makeRequest(url)
    else:
        if xbmcvfs.exists(url):
            if url.startswith("smb://"):
                copy = xbmcvfs.copy(
                    url,
                    xbmc.translatePath(
                        os.path.join(profile, 'temp', 'sorce_temp.txt')))
                if copy:
                    data = open(
                        xbmc.translatePath(
                            os.path.join(profile, 'temp', 'sorce_temp.txt')),
                        "r").read()
                    xbmcvfs.delete(
                        xbmc.translatePath(
                            os.path.join(profile, 'temp', 'sorce_temp.txt')))
                else:
                    print "--- failed to copy from smb: ----"
            else:
                data = open(url, 'r').read()
        else:
            print "---- Soup Data not found! ----"
            return
    soup = BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    return soup

Exemple #6

0

Afficher le fichier

def getSoup(url,data=None):
        global viewmode,tsdownloader, hlsretry
        tsdownloader=False
        hlsretry=False
        if url.startswith('http://') or url.startswith('https://'):
            enckey=False
            if '$$TSDOWNLOADER$$' in url:
                tsdownloader=True
                url=url.replace("$$TSDOWNLOADER$$","")
            if '$$HLSRETRY$$' in url:
                hlsretry=True
                url=url.replace("$$HLSRETRY$$","")
            if '$$LSProEncKey=' in url:
                enckey=url.split('$$LSProEncKey=')[1].split('$$')[0]
                rp='$$LSProEncKey=%s$$'%enckey
                url=url.replace(rp,"")                
            data =makeRequest(url)
            if enckey:
                    import pyaes
                    enckey=enckey.encode("ascii")
                    print enckey
                    missingbytes=16-len(enckey)
                    enckey=enckey+(chr(0)*(missingbytes))
                    print repr(enckey)
                    data=base64.b64decode(data)
                    decryptor = pyaes.new(enckey , pyaes.MODE_ECB, IV=None)
                    data=decryptor.decrypt(data).split('\0')[0]
                    #print repr(data)
            if re.search("#EXTM3U",data) or 'm3u' in url:
#                print 'found m3u data'
                return data
        elif data == None:
            if not '/'  in url or not '\\' in url:
#                print 'No directory found. Lets make the url to cache dir'
                url = os.path.join(communityfiles,url)
            if xbmcvfs.exists(url):
                if url.startswith("smb://") or url.startswith("nfs://"):
                    copy = xbmcvfs.copy(url, os.path.join(profile, 'temp', 'sorce_temp.txt'))
                    if copy:
                        data = open(os.path.join(profile, 'temp', 'sorce_temp.txt'), "r").read()
                        xbmcvfs.delete(os.path.join(profile, 'temp', 'sorce_temp.txt'))
                    else:
                        addon_log("failed to copy from smb:")
                else:
                    data = open(url, 'r').read()
                    if re.match("#EXTM3U",data)or 'm3u' in url:
#                        print 'found m3u data'
                        return data
            else:
                addon_log("Soup Data not found!")
                return
        if '<SetViewMode>' in data:
            try:
                viewmode=re.findall('<SetViewMode>(.*?)<',data)[0]
                xbmc.executebuiltin("Container.SetViewMode(%s)"%viewmode)
                print 'done setview',viewmode
            except: pass
        return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)

Exemple #7

0

Afficher le fichier

def search(
    keyword,
    page,
    type_='all',
):
    url = base + 'search.php'

    formdata = dict(submit='%E6%90%9C+%E7%B4%A2', keyword=keyword, type=type_)
    html = requests.post(url, data=formdata)
    BeautifulSOAP(html)

Exemple #8

0

Afficher le fichier

def addToMyChannels(cname):
	try:
		fileName=os.path.join(profile_path, 'MyChannels.xml')
		print fileName
		MyChannelList=getSoup(fileName,True)
	except: MyChannelList=None
	if not MyChannelList:
		MyChannelList= BeautifulSOAP('<channels></channels>')
	
	val=MyChannelList.find("channel",{"cname":cname})
	#print 'val is ',val
	if not val:
		channeltag = Tag(MyChannelList, "channel")
		channeltag['cname']=cname
		MyChannelList.channels.insert(0, channeltag)
		#print MyChannelList.prettify()

		with open(fileName, "wb") as filewriter:
			filewriter.write(str(MyChannelList))

Exemple #9

0

Afficher le fichier

Fichier : google_result.py Projet : darkr4y/pentest-script

def geturladdress(keywords,type,number,filename):
     urltitle = {}
     pageid = string.atoi(number)/100
     for idpage in range(0,pageid,1):
          entirehtml= getresponse(keywords,type,idpage*100)
          soup = BeautifulSOAP(entirehtml)
          results = soup.findAll('li', {'class': 'g'})
          for result in results:
               title_a = result.find('a')
               if not title_a:
                    continue
               else:
                    title = ''.join(title_a.findAll(text=True))
                    title = html_unescape(title)
                    #print title
                    url = title_a['href']
                    #print url
                    url = getdomain(url)
                    urltitle[title]= url
     writetofile(filename,urltitle)

Exemple #10

0

Afficher le fichier

Fichier : google_result.py Projet : thisiseast/pentest-script

def geturladdress(keywords, type, number, filename):
    urltitle = {}
    pageid = string.atoi(number) / 100
    for idpage in range(0, pageid, 1):
        entirehtml = getresponse(keywords, type, idpage * 100)
        soup = BeautifulSOAP(entirehtml)
        results = soup.findAll('li', {'class': 'g'})
        for result in results:
            title_a = result.find('a')
            if not title_a:
                continue
            else:
                title = ''.join(title_a.findAll(text=True))
                title = html_unescape(title)
                #print title
                url = title_a['href']
                #print url
                url = getdomain(url)
                urltitle[title] = url
    writetofile(filename, urltitle)

Exemple #11

0

Afficher le fichier

Fichier : default.py Projet : prismcube-czsk/dmd-xbmc

def getChannelItems(name):
        response = open(file, 'rb')
        link=response.read()
        soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
        channel_list = soup.find('channel', attrs={'name' : name})
        items = channel_list('item')
        for channel in channel_list('subchannel'):
                name = channel('name')[0].string
                thumb = channel('thumbnail')[0].string
                addDir(name,'',3,thumb)
        for item in items:
                try:
                        name = item('title')[0].string
                except:
                        pass
                     
                try:
                        if __settings__.getSetting('mirror_link') == "true":
                                try:
                                        url = item('link')[1].string	
                                except:
                                        url = item('link')[0].string
                        if __settings__.getSetting('mirror_link_low') == "true":
                                try:
                                        url = item('link')[2].string	
                                except:
                                        try:
                                                url = item('link')[1].string
                                        except:
                                                url = item('link')[0].string
                        else:
                                url = item('link')[0].string
                except:
                        pass
                        
                try:
                        thumbnail = item('thumbnail')[0].string
                except:
                        thumbnail = ''
                addLink(url,name,thumbnail)

Exemple #12

0

Afficher le fichier

def getChannelItems(name):
    response = open(file, 'rb')
    link = response.read()
    soup = BeautifulSOAP(link, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    channel_list = soup.find('channel', attrs={'name': name})
    items = channel_list('item')
    for channel in channel_list('subchannel'):
        name = channel('name')[0].string
        thumb = channel('thumbnail')[0].string
        addDir(name, '', 3, thumb)
    for item in items:
        try:
            name = item('title')[0].string
        except:
            pass

        try:
            if __settings__.getSetting('mirror_link') == "true":
                try:
                    url = item('link')[1].string
                except:
                    url = item('link')[0].string
            if __settings__.getSetting('mirror_link_low') == "true":
                try:
                    url = item('link')[2].string
                except:
                    try:
                        url = item('link')[1].string
                    except:
                        url = item('link')[0].string
            else:
                url = item('link')[0].string
        except:
            pass

        try:
            thumbnail = item('thumbnail')[0].string
        except:
            thumbnail = ''
        addLink(url, name, thumbnail)

Exemple #13

0

Afficher le fichier

Fichier : torcheck.py Projet : k11dd00/oniongen

    def check_tor_status(self):
        """
        Checks the tor status

        Returns
        -------
        status: Bool
            The tor status
        """

        html = urllib2.urlopen(self.TOR_CHECK_ENDPOINT).read()
        parsed_html = BeautifulSOAP(html)
        content = parsed_html.body.find('h1', attrs={'class': 'not'}).text
        return self.text_key in content.lower()

Exemple #14

0

Afficher le fichier

def getSoup(url):
        if url.startswith('http://'):
            data = makeRequest(url)
        else:
            if xbmcvfs.exists(url):
                if url.startswith("smb://") or url.startswith("nfs://"):
                    copy = xbmcvfs.copy(url, os.path.join(profile, 'temp', 'sorce_temp.txt'))
                    if copy:
                        data = open(os.path.join(profile, 'temp', 'sorce_temp.txt'), "r").read()
                        xbmcvfs.delete(os.path.join(profile, 'temp', 'sorce_temp.txt'))
                    else:
                        addon_log("failed to copy from smb:")
                else:
                    data = open(url, 'r').read()
            else:
                addon_log("Soup Data not found!")
                return
        return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)

Exemple #15

0

Afficher le fichier

Fichier : extract.py Projet : jxqlovejava/sixgod

class ExtractNewsInfo(object):

     def __init__(self):
         self.__soup = None

     def parser(self , html):
          self.clear()
          self.__soup = BeautifulSOAP(html)


     def get_tilte(self):
          if self.__soup:
              return self.__soup.html.head.title.text
          raise NoDataParser("没有调用解析")

     def __get_mata(self ,  name):
         if self.__soup:
             __keywords = self.__soup.find('meta' , attrs= {'name': name})
              if __keywords:
                  return __keywords['content']
         return None

Exemple #16

0

Afficher le fichier

Fichier : tv5.py Projet : idaviesfmts/hmdsm.repository

def extractUrl(uid):
    #enc="eNrjYnGVFRFl8GeOYHERtPTnZuDlYZPgYZdhkfXlCgjR9+XhZAlmCBTVlBRhYI1QFhAMAbIFBKMkPAJURcOcxWNcwwEd4gnn"
    #    eJzjYnGVFRFl8GeOYHERtPTnZuDlYZPgYZdhkfXlCgjR9+XhZAlmCBTVlBRhYI1QFhAMAbIFBKMkPAJURcOcxWNcwwEd4gnn

    str = "operation=getPlaylist&uid=%s" % urllib.quote_plus(uid)
    str = encode(str, key)
    s = getUrl(
        "http://www.tv5mondeplusafrique.com/html/servicesV2/getPlaylist.xml?BulkLoaderNoCache=2_2&",
        post=str)
    s = decode(s, key)
    print "returned", repr(s.decode("unicode-escape"))
    from BeautifulSoup import BeautifulStoneSoup, BeautifulSoup, BeautifulSOAP
    xmlobj = BeautifulSOAP(s.decode("unicode-escape"),
                           convertEntities=BeautifulStoneSoup.XML_ENTITIES)

    vurl = xmlobj("video")[0]
    #    print vurl
    su = vurl("secureurl")[0].string
    su = re.sub('[\[CDATA\]]', '', su)
    #print su
    #    print 'yyyyyyyyyyyy',vurl
    if 'manifest.f4m?' in su:
        su = 'plugin://plugin.video.mastertuga/?url=' + urllib.quote_plus(su)
    return su

Exemple #17

0

Afficher le fichier

Fichier : extract.py Projet : idreamsoft/sixgod

class ExtractNewsInfo(object):

	 def __init__(self):
		 self.__soup = None

	 def parser(self , html):
	 	 self.clear()
	 	 self.__soup = BeautifulSOAP(html)


	 def get_tilte(self):
	 	 if self.__soup:
	 	 	return self.__soup.html.head.title.text
	 	 raise NoDataParser("没有调用解析")

	 def __get_mata(self ,  name):
	 	if self.__soup:
	 		__keywords = self.__soup.find('meta' , attrs= {'name': name})
	 	 	if __keywords:
	 	 		return __keywords['content']
	 	return None

	 def get_key_words(self):
	 	 return self.__get_mata( 'keywords')


	 def get_description(self):
	 	 return self.__get_mata('description') 
	 	 




	 def clear(self):
	 	 if self.__soup:
	 	 	self.__soup = None

Exemple #18

0

Afficher le fichier

Fichier : script.py Projet : aryeguy/subtitles

def download_subtitle(series_name, season, episode, output_dir):
    """

    :param series_name:
    :param season:
    :param episode:
    """
    dir_name = os.path.join(
        output_dir, "{}_s{:02}e{:02}".format(series_name, season, episode))
    if not os.access(dir_name, os.F_OK):
        os.makedirs(dir_name)
    os.chdir(dir_name)
    search_url = "{}{} s{:02}e{:02}".format(BASE_SEARCH, series_name, season,
                                            episode).replace(" ", "+")
    logging.info("Search url: {}".format(search_url))
    search_page_data = BeautifulSOAP(urllib2.urlopen(search_url).read())
    # TODO handle zero matches
    search_results_table = search_page_data.find(
        attrs={"class": "list first_column_title"})
    results = search_results_table.tbody.findAll("tr")
    results = [
        x for x in results
        if x.findAll("td")[2].a.div["alt"] == "English subtitles"
    ]
    results = [x.findAll("td")[0].findAll("div")[1].a["href"] for x in results]
    for result in results:
        download_page_url = BASE_URL + "/he/" + result
        download_page_data = urllib2.urlopen(download_page_url).read()
        download_button = BeautifulSOAP(download_page_data).find(
            attrs={"class": "button big download"})
        download_url = BASE_URL + download_button.get("href")
        download_data = urllib2.urlopen(download_url).read()
        zip_filename = "output.zip"
        open(zip_filename, "wb").write(download_data)
        subprocess.call(["unzip", "-o", zip_filename])
        logging.info("Unzipped {}".format(zip_filename))
        os.unlink(zip_filename)

Exemple #19

0

Afficher le fichier

Fichier : extract.py Projet : idreamsoft/sixgod

	 def parser(self , html):
	 	 self.clear()
	 	 self.__soup = BeautifulSOAP(html)

Exemple #20

0

Afficher le fichier

def getSoup(url):
    data = makeRequest(url)
    return BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)

Exemple #21

0

Afficher le fichier

def PlayCommunityStream(channelId, name, mode):
	try:
		#print 'PlayCommunityStream'
		xbmcplugin.endOfDirectory(int(sys.argv[1]))
		pDialog = xbmcgui.DialogProgress()
		ret = pDialog.create('XBMC', 'Finding available resources...')
		#print 'channelId',channelId
		playFirst=selfAddon.getSetting( "playFirstChannel" )
		if playFirst==None or playFirst=="" or playFirst=="false":
			playFirst=False
		else:
			playFirst=True
		playFirst=bool(playFirst)
		pDialog.update(20, 'Finding sources..')
		providers,default_source_exists=getSourceAndStreamInfo(channelId,playFirst,pDialog, sourceFilter)
		if default_source_exists:
			playFirst=True
		if len(providers)==0:
			pDialog.close()
			time = 2000  #in miliseconds
			line1="No sources found"
			xbmc.executebuiltin('Notification(%s, %s, %d, %s)'%(__addonname__,line1, time, __icon__))
			return
		pDialog.update(30, 'Processing sources..')
		pDialog.close()
		#source=providers[""]

		
		enforceSourceSelection=False
		#print 'playFirst',playFirst
		done_playing=False
		current_index=0
		auto_skip=False
		auto_skip=True if selfAddon.getSetting( "playOneByOne" )=="true" else False
		while not done_playing:
			#print 'trying again',enforceSourceSelection
			ret = pDialog.create('XBMC', 'Trying to play the source')
			#print 'dialogue creation'
			done_playing=True
			if (enforceSourceSelection or (len (providers)>1 and not playFirst)) and not auto_skip:
				#print 'select sources'
				selectedprovider=selectSource(providers)
				if not selectedprovider:
					return
			else:
				selectedprovider=providers[current_index]
				enforceSourceSelection=True
			#print 'picking source'
			(source,sInfo,order)=selectedprovider #pick first one
			#print source

			processor = source.findtext('processor')
			sourcename = source.findtext('sname')

			if communityStreamPath not in sys.path:
				sys.path.append(communityStreamPath)
			#print processor
		
		
			#from importlib import import_module
			processorObject=import_module(processor.replace('.py',''))
		
		
			pDialog.update(60, 'Trying to play..')
			pDialog.close()
			sinfoSoup= BeautifulSOAP(etree.tostring(sInfo), convertEntities=BeautifulStoneSoup.XML_ENTITIES)
			done_playing=processorObject.PlayStream(source,sinfoSoup,name,channelId)
			#print 'done_playing',done_playing
			if not done_playing:
				time = 2000  #in miliseconds
				line1="Failed playing from "+sourcename
				xbmc.executebuiltin('Notification(%s, %s, %d, %s)'%(__addonname__,line1, time, __icon__))
				if auto_skip:
					done_playing=False
					current_index+=1
					if current_index>len(providers):
						done_playing=True
					if not done_playing:
						(s,i,o) =providers[current_index]
						titles=''
						if s.findtext('id')=="generic":
							try:
								#print 'trying generic name'
								titles=s.findtext('sname')+': '+i.find('item').findtext('title')
								#print 'trying generic name end '
							except:
								titles=s.findtext('sname')
						else:
							try:
								titles=s.findtext('sname')+': '+i.findtext('title')
							except: titles=s.findtext('sname')                       

						ret = pDialog.create('XBMC', 'Trying to play the Item# %d of %d, Cancel in 3 seconds.\n Source:%s'%(current_index+1, len(providers),titles))

						xbmc.sleep(3000)
						if pDialog.iscanceled():
							current_index=0
							done_playing=False
							enforceSourceSelection=True
							auto_skip=False
			#print 'donexxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
		return 
	except:
		traceback.print_exc(file=sys.stdout)

Exemple #22

0

Afficher le fichier

def RefreshResources(auto=False, fNameOnly=None):
#	print Fromurl
	pDialog = xbmcgui.DialogProgress()
	if auto:
		ret = pDialog.create('XBMC', 'Daily Auto loading Fetching resources...')
	else:
		ret = pDialog.create('XBMC', 'Fetching resources...')
	baseUrlForDownload='https://raw.githubusercontent.com/xamarcade/DaawoTV/master/plugin.video.DaawoTV/resources/community/'
	Fromurl=baseUrlForDownload+'Resources.xml'
	req = urllib2.Request(Fromurl)
	req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36')
	req.add_header('Referer','http://shahidaddon/')
	response = urllib2.urlopen(req)
	data=response.read()
	response.close()
	#data='<resources><file fname="Categories.xml"/><file fname="palestinecoolUrls.xml" url="http://goo.gl/yNlwCM"/></resources>'
	pDialog.update(20, 'Importing modules...')
	soup= BeautifulSOAP(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
	resources=soup('file')
	fileno=1
	totalFile = len(resources)
	import hashlib
	for rfile in resources:
		if pDialog.iscanceled(): return
		progr = (fileno*80)/totalFile
		fname = rfile['fname']
		if fNameOnly and not fname==fNameOnly: continue
		remoteUrl=None
		try:
			remoteUrl = rfile['url']
		except: pass
		isBase64=False
		try:
			isBase64= rfile['base64']=="true"
		except: pass
		if remoteUrl:
			fileToDownload = remoteUrl
		else:
			fileToDownload = baseUrlForDownload+fname
		fileHash=hashlib.md5(fileToDownload+addonversion).hexdigest()
		lastFileTime=selfAddon.getSetting( "Etagid"+fileHash)  
		if lastFileTime=="": lastFileTime=None
		resCode=200
		#print fileToDownload
		eTag=None        
		try:
			req = urllib2.Request(fileToDownload)
			req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36')
			req.add_header('Referer','http://shahidaddon/')

			if lastFileTime:
				req.add_header('If-None-Match',lastFileTime)
			response = urllib2.urlopen(req)
			resCode=response.getcode()
			if resCode<>304:
				try:
					eTag=response.info().getheader('Etag')
				except: pass
				data=response.read()
		except Exception as e: 
			s = str(e)
			if 'Not Modified'.lower() in s.lower(): resCode=304
			data=''
		if ('Exec format error: exec' in data or 'A file permissions error has occurred' in data) and 'xbmcplugin' not in data:
			data=''
		if len(data)>0:
			try:
				if isBase64: 
					import base64
					data=base64.b64decode(data)
			except: 
				print 'Failed..not base64.'+fname
				pDialog.update(20+progr, 'Failed..not base64.'+fname)
				data=''
		if len(data)>0:
			with open(os.path.join(communityStreamPath, fname), "wb") as filewriter:
				filewriter.write(data)
				if eTag:
					selfAddon.setSetting( id="Etagid"+fileHash ,value=eTag)    
			pDialog.update(20+progr, 'imported ...'+fname)
		elif resCode==304:
			pDialog.update(20+progr, 'No Change.. skipping.'+fname)
		else:            
			pDialog.update(20+progr, 'Failed..zero byte.'+fname)
		fileno+=1
	pDialog.close()
	dialog = xbmcgui.Dialog()
	ok = dialog.ok('XBMC', 'Download finished. Close Addon and come back')

Exemple #23

0

Afficher le fichier

Fichier : _common.py Projet : Teamj103/repository.fiercegorilla

def PopUpNotice():
    raw_notice = OpenURL(_Edit.PopUpNotice,
                         timeout=500,
                         cookiejar=BYB.cookie_name_create(_Edit.PopUpNotice))
    xml_notice = BeautifulSOAP(raw_notice,
                               convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    BYB.headers_create(addon_cache,
                       table='pop_message',
                       headers='popupdate,displaydate')
    show = False
    if isinstance(xml_notice, BeautifulSOAP):
        if len(xml_notice('notify')) > 0:
            NotifingData = xml_notice('notify')
            for NotifyData in NotifingData:
                message = ''
                date = None
                header = ''
                freq = ''
                try:
                    header = NotifyData('header')[0].string
                    freq = NotifyData('freq')[0].string
                    message = NotifyData('message')[0].string
                    date = NotifyData('date')[0].string
                except:
                    pass
    DisplayDateCheck = BYB.check_is_in_DB_table(
        addon_cache,
        table='pop_message',
        row_header='displaydate',
        check_item=datetime.datetime.today().strftime('%Y-%m-%d'))
    OnceDisplayCheck = BYB.check_is_in_DB_table(addon_cache,
                                                table='pop_message',
                                                row_header='popupdate',
                                                check_item=date)
    BYB.write_to_DB(addon_cache,
                    table='pop_message',
                    headers='popupdate,displaydate',
                    items=(date,
                           datetime.datetime.today().strftime('%Y-%m-%d')))
    Dolog(datetime.datetime.today().strftime('%Y-%m-%d'), line_info=True)
    Dolog(BYB.check_is_in_DB_table(
        addon_cache,
        table='pop_message',
        row_header='displaydate',
        check_item=datetime.datetime.today().strftime('%Y-%m-%d')),
          line_info=True)
    if freq == 'every':
        show = True
    elif freq == 'daily' and DisplayDateCheck == False:
        Dolog('show daily', line_info=True)
        show = True
    elif freq == 'once' and OnceDisplayCheck == False:
        show = True
    else:
        Dolog('key word for freq wrong either every,daily,once')
    if show == True:
        if len(message) > 0:
            try:
                from libs import customgui
                customgui.PopUpDialog(header=header, text=message)
            except:
                BYB.Notify(title='Error',
                           message='Error in displaying Pop Up Message')
        else:
            BYB.Notify(title='Error',
                       message='Error in displaying Pop Up Message')
    else:
        pass

Exemple #24

0

Afficher le fichier

Fichier : demo_spider1.py Projet : coolws/crawler

# 下载列表 +歌曲名
# BeautifulSoap
# http://www.crummy.com/software/BeautifulSoup/bs3/documentation.zh.html  



import re,urllib,urllib2
from BeautifulSoup import BeautifulSOAP

url= 'http://www.xiami.com/artist/top/id/1234'

headers = {'User-Agent':"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0"}
req = urllib2.Request(url=url,headers=headers)
content = urllib2.urlopen(req)
soup = BeautifulSOAP(content,fromEncoding="gb18030")
#print soup.originalEncoding
#print  soup.prettify()

songlist = soup.findAll('a',{'href':re.compile(r'/song/(\d)+')})
#print dir(songlist[0])
for song in songlist:
    song_url=''
    song_url= 'www.xiami.com' + song.get('href')
    print song_url ,song.string

#songlist = re.findall(pattern,string)
#songlist = re.findall(pattern,content)
#for song in songlist:
#    print song
#

Exemple #25

0

Afficher le fichier

Fichier : extract.py Projet : jxqlovejava/sixgod

 def parser(self , html):
      self.clear()
      self.__soup = BeautifulSOAP(html)

Exemple #26

0

Afficher le fichier

def getSoup(fileName, isabsolutePath=False):
	strpath=os.path.join(communityStreamPath, fileName)
	if isabsolutePath:
		strpath=fileName
	data = open(strpath, "r").read()
	return BeautifulSOAP(data)#, convertEntities=BeautifulStoneSoup.XML_ENTITIES)

Exemple #27

0

Afficher le fichier

 def __init__(self, html):
     self.parser = BeautifulSOAP(html)