def GetRating(self, id): self.GetInfo(id) url = 'http://rating.kinopoisk.ru/' + str(id) + '.xml' Data = Get_url(url) if Data: xml = BeautifulStoneSoup(Data) try: kp = xml.find('kp_rating') r_kp = kp.string.encode('UTF-8') v_kp = kp['num_vote'].encode('UTF-8') except: r_kp = '-' v_kp = '-' try: imdb = xml.find('imdb_rating') r_imdb = imdb.string.encode('UTF-8') v_imdb = imdb['num_vote'].encode('UTF-8') except: r_imdb = '-' v_imdb = '-' return r_kp, v_kp, r_imdb, v_imdb else: return '-', '-', '-', '-'
def play(): smilurl=common.args.url+'&format=SMIL' data = common.getURL(smilurl) tree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) print tree.prettify() base = tree.find('meta') if base: base = base['base'] if 'rtmp://' in base: playpath=tree.find('ref')['src'] if '.mp4' in playpath: playpath = 'mp4:'+playpath else: playpath = playpath.replace('.flv','') finalurl = base+' playpath='+playpath elif 'http://' in base: playpath=tree.find('ref')['src'] finalurl = base+playpath else: finalurl=tree.find('ref')['src'] item = xbmcgui.ListItem(path=finalurl) xbmcplugin.setResolvedUrl(pluginhandle, True, item)
def categoryWorker(thread_num, l): while (True and processCategories): department = category_queue.get() sys.stderr.write('Parsing categories for FY %s department %s, division %s at %s\n' % ( department['fiscal_year'], department['department_name'], department['division_name'], department['division_detail_page_url'])) my_mech = mechanize.Browser() category_request = my_mech.open(department['division_detail_page_url']) category_response = category_request.read() category_soup = BeautifulStoneSoup(category_response, convertEntities=BeautifulStoneSoup.ALL_ENTITIES) category_table = category_soup.find('table', id='grdAgency') category_page_dropdown = category_soup.find('select', id='MozillaPager1_ddlPageNumber') category_pages = [] if category_page_dropdown: for category_page in category_page_dropdown.findAll('option'): category_pages.append(category_page['value']) else: sys.stderr.write("No page drop down on %s.\n" % department['division_detail_page_url']) department['categories'].extend(parseCategoryTable(category_response)) for page in category_pages[1:]: sys.stderr.write(' ... Page %s from %s\n' % (page, department['division_detail_page_url'])) my_mech.select_form('ctl02') my_mech.form.set_value([page], 'MozillaPager1$ddlPageNumber') category_page_request = my_mech.form.click('MozillaPager1$btnPageNumber') local_categories = parseCategoryTable(mechanize.urlopen(category_page_request).read()) department['categories'].extend(local_categories) category_queue.task_done()
def extract_spectrum_info(filename): # Open the file with open(filename, 'r') as f: xml = f.read() # Extract the info soup = BeautifulStoneSoup(xml) info = {'id': soup.find('id').contents[0], 'database-id': soup.find('database-id').contents[0], 'inchi-key': soup.find('inchi-key').contents[0], 'solvent': soup.find('solvent').contents[0], 'sample-ph': soup.find('sample-ph').contents[0], 'frequency': soup.find('frequency').contents[0]} # Enforce spectrum validity rules if ( info['solvent'] == VALID_SOLVENT and float(info['sample-ph']) <= MAX_PH and float(info['sample-ph']) >= MIN_PH): # Get the inchi code try: info['inchi-code'] = \ urllib2.urlopen(INCHI_RESOLVER_URL.format(info['inchi-key'])).read() time.sleep(TIME_BETWEEN_URL_REQUESTS) return info except: # Any exception, don't care which return None else: return None
def image_path_with_fgdc_to_world_file(image_path, world_file, srs, units="m"): image = Image.open(image_path) (width, height) = image.size xml_path = "%s.xml" % (os.path.splitext(image_path)[0]) with open(xml_path, "r") as f: xml = BeautifulStoneSoup(f) north_bound = float(xml.find("northbc").text) west_bound = float(xml.find("westbc").text) south_bound = float(xml.find("southbc").text) east_bound = float(xml.find("eastbc").text) srs = "%s" % (srs) if not srs.startswith("EPSG:"): srs = "EPSG:%s" % (srs) (west_bound, north_bound) = latlng_to_srs(north_bound, west_bound, srs, units) (east_bound, south_bound) = latlng_to_srs(south_bound, east_bound, srs, units) x_pixel_width = (east_bound - west_bound) / width y_pixel_width = (south_bound - north_bound) / height for l in [x_pixel_width, 0, 0, y_pixel_width, west_bound, north_bound]: world_file.write("%s\n" % l) return world_file
def categoryDetailWorker(thread_num, l): while (True and processCategoryDetails): category = category_details_queue.get() sys.stderr.write('Parsing category details for %s at %s\n' % ( category['name'], category['detail_url'])) my_mech = mechanize.Browser() category_detail_request = my_mech.open(category['detail_url']) category_detail_response = category_detail_request.read() category_detail_soup = BeautifulStoneSoup(category_detail_response, convertEntities=BeautifulStoneSoup.ALL_ENTITIES) category_detail_table = category_detail_soup.find('table', id='grdCategories') category_detail_page_dropdown = category_detail_soup.find('select', id='MozillaPager1_ddlPageNumber') category_detail_pages = [] if category_detail_page_dropdown: for category_detail_page in category_detail_page_dropdown.findAll('option'): category_detail_pages.append(category_detail_page['value']) else: sys.stderr.write("No page drop down on %s.\n" % category['detail_url']) category_details = parseCategoryDetailTable(category_detail_response) l.acquire() category['details'].extend(category_details) l.release() for page in category_detail_pages[1:]: sys.stderr.write(' ... Page %s from %s\n' % (page, category['detail_url'])) my_mech.select_form('ctl02') my_mech.form.set_value([page], 'MozillaPager1$ddlPageNumber') category_detail_page_request = my_mech.form.click('MozillaPager1$btnPageNumber') category_details = parseCategoryDetailTable(mechanize.urlopen(category_detail_page_request).read()) l.acquire() category['details'].extend(category_details) l.release() category_details_queue.task_done()
def get_weather(location): degree = '°'.decode('utf8') conditions = {} base_url = 'http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query=' try: page = urllib.request.urlopen(base_url + location) except: return 'Could not open the page!' else: soup = BeautifulStoneSoup(page) conditions['location'] = soup.find('full').contents[0] if 2 >= len(conditions['location']): return 'Inexistent location: ' + location else: conditions['weather'] = soup.find('weather').contents[0] conditions['temp'] = soup.find('temperature_string').contents[0] pos = conditions['temp'].find(' ') conditions['temp'] = conditions['temp'][:pos] + degree + \ conditions['temp'][pos:] pos = conditions['temp'].rfind(' ') conditions['temp'] = conditions['temp'][:pos] + degree + \ conditions['temp'][pos:] page.close() return conditions
def getXmlCursor(xml): """解析xml,获取当前查询的起始位置,每页数量,总共数量""" soup = BeautifulStoneSoup(xml) start = int(soup.find('opensearch:startindex').string) count = int(soup.find('opensearch:itemsperpage').string) totalCount = int(soup.find('opensearch:totalresults').string) return (start, count, totalCount)
def play(url=common.args.url, playrtmp=True): # GET DETAILS FROM API #url = build_api_url('details','',ID=id,ios=True) #data = common.getURL(url) #if data: # for mediaUrl in demjson.decode(data)['MediaURLs']: # if mediaUrl['type'] == '480p_1mbps.mp4': # finalurl=mediaUrl['path'] # GET ID FROM HTTP PAGE #data = common.getURL(url) #id,paremeters=re.compile("StartPlayer \((.+?), '(.+?)',").findall(data)[0] #Get file path vidwall = 'http://www.crackle.com/app/vidwall.ashx?flags=-1&fm=%s&partner=20' % url data = common.getURL(vidwall) tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) filepath = tree.find('i')['p'] if playrtmp: # RTMP STREAMING rtmpbase = tree.find('channel')['strrtmpcdnurl'] swfUrl = 'http://www.crackle.com/flash/ReferrerRedirect.ashx' finalurl = 'rtmp://' + rtmpbase + '/' + filepath + '480p_1mbps.mp4' + ' swfurl=' + swfUrl + " swfvfy=true" else: # HTTP STREAMING finalurl = 'http://media-us-am.crackle.com/' + filepath + '480p_1mbps.mp4' item = xbmcgui.ListItem(path=finalurl) return xbmcplugin.setResolvedUrl(pluginhandle, True, item)
def parse_config(soup): """There are lots of goodies in the config we get back from the ABC. In particular, it gives us the URLs of all the other XML data we need. """ soup = soup.replace('&', '&') xml = BeautifulStoneSoup(soup) # should look like "rtmp://cp53909.edgefcs.net/ondemand" # Looks like the ABC don't always include this field. # If not included, that's okay -- ABC usually gives us the server in the auth result as well. rtmp_url = xml.find('param', attrs={ 'name': 'server_streaming' }).get('value') rtmp_chunks = rtmp_url.split('/') return { 'rtmp_url': rtmp_url, 'rtmp_host': rtmp_chunks[2], 'rtmp_app': rtmp_chunks[3], 'api_url': xml.find('param', attrs={ 'name': 'api' }).get('value'), 'categories_url': xml.find('param', attrs={ 'name': 'categories' }).get('value'), }
def geocode(address="", city="", state="CA"): address = urllib.quote(address.encode('utf-8')) g_url = 'http://local.yahooapis.com/MapsService/V1/geocode?appid=' g_url += '0MoPk9DV34FH0rumXB_xENjSlf.jdG4woRO9nFqyUcM86nLsFSynUvAwZZo6g--' g_url += '&street=%s&city=%s&state=%s' % (address, city, state) url = urllib.urlopen(g_url) dom = BeautifulStoneSoup(url) url.close() coords = { 'address': None, 'latitude': None, 'longitude': None, } result_attr = dom.find('result') if result_attr and result_attr['precision'] == 'address': dom_fields = ['address', 'latitude', 'longitude'] for field in dom_fields: i = dom.find(field) if i: if field == 'address': coords[field] = i.string else: try: coords[field] = float(i.string) except: pass return coords
def get_weather(location): degree = '°'.decode('utf8') conditions = {} base_url = 'http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query=' try: page = urllib.urlopen(base_url + location) except: return 'Could not open the page!' else: soup = BeautifulStoneSoup(page) conditions['location'] = soup.find('full').contents[0] if 2 >= len(conditions['location']): return 'Inexistent location: ' + location else: conditions['weather'] = soup.find('weather').contents[0] conditions['temp'] = soup.find('temperature_string').contents[0] pos = conditions['temp'].find(' ') conditions['temp'] = conditions['temp'][:pos] + degree + \ conditions['temp'][pos:] pos = conditions['temp'].rfind(' ') conditions['temp'] = conditions['temp'][:pos] + degree + \ conditions['temp'][pos:] page.close() return conditions
def cablelogin(selected): if os.path.isfile(COOKIEFILE): if addon.getSetting("clearcookies") == 'true': os.remove(COOKIEFILE) else: return data = getURL('http://www.epixhd.com/epx/ajax/chooseMSO/?mso_id=' + selected) jsondata = demjson.decode(data) tree = BeautifulStoneSoup(jsondata['content'], convertEntities=BeautifulStoneSoup.HTML_ENTITIES) try: signinUrl = tree.find('iframe')['src'] provider = tree.find('iframe')['class'] except: signinUrl = re.compile( '<script language="javascript">self.parent.location="(.*?)";' ).findall(jsondata['content'])[0] provider = 'cox' br = mechanize.Browser() br.set_handle_robots(False) br.set_cookiejar(cj) br.addheaders = [( 'User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.17) Gecko/20110422 Ubuntu/10.10 (maverick) Firefox/3.6.17' )] sign_in = br.open(signinUrl) if provider == 'charter': br.select_form(name="f") br["username"] = addon.getSetting("login_name") br["password"] = addon.getSetting("login_pass") br["zipcode"] = addon.getSetting("zipcode") elif provider == 'cox': br.select_form(name="LoginPage") br["username"] = addon.getSetting("login_name") br["password"] = addon.getSetting("login_pass") elif provider == 'dish': br.select_form(name="f") br["username"] = addon.getSetting("login_name") br["password"] = addon.getSetting("login_pass") elif provider == 'mediacom': br.select_form(name="f") br["username"] = addon.getSetting("login_name") br["password"] = addon.getSetting("login_pass") elif provider == 'suddenlink': br.select_form(name="f") br["username"] = addon.getSetting("login_name") br["password"] = addon.getSetting("login_pass") elif provider == 'verizon': br.select_form(name="loginpage") br["IDToken1"] = addon.getSetting("login_name") br["IDToken2"] = addon.getSetting("login_pass") br.submit() br.select_form(nr=0) response = br.submit() data = response.read() redirect = 'http://www.epixhd.com' + re.compile( 'self.parent.location="(.*?)"').findall(data)[0] print getURL(redirect) cj.save(COOKIEFILE, ignore_discard=True, ignore_expires=True)
def parse_config(soup): """ There are lots of goodies in the config we get back from the ABC. In particular, it gives us the URLs of all the other XML data we need. """ soup = soup.replace('&', '&') xml = BeautifulStoneSoup(soup) # should look like "rtmp://cp53909.edgefcs.net/ondemand" # Looks like the ABC don't always include this field. # If not included, that's okay -- ABC usually gives us the server in the auth result as well. rtmp_url = xml.find('param', attrs={'name':'server_streaming'}).get('value') rtmp_chunks = rtmp_url.split('/') return { 'rtmp_url' : rtmp_url, 'rtmp_host' : rtmp_chunks[2], 'rtmp_app' : rtmp_chunks[3], 'auth_url' : xml.find('param', attrs={'name':'auth'}).get('value'), 'api_url' : xml.find('param', attrs={'name':'api'}).get('value'), 'categories_url' : xml.find('param', attrs={'name':'categories'}).get('value'), 'captions_url' : xml.find('param', attrs={'name':'captions'}).get('value'), }
def GetRating(self,id): self.GetInfo(id) url = 'http://rating.kinopoisk.ru/'+str(id)+'.xml' Data = Get_url(url) if Data: xml = BeautifulStoneSoup(Data) try: kp = xml.find('kp_rating') r_kp = kp.string.encode('UTF-8') v_kp = kp['num_vote'].encode('UTF-8') except: r_kp = '-' v_kp = '-' try: imdb = xml.find('imdb_rating') r_imdb = imdb.string.encode('UTF-8') v_imdb = imdb['num_vote'].encode('UTF-8') except: r_imdb = '-' v_imdb = '-' return r_kp, v_kp, r_imdb, v_imdb else: return '-', '-', '-', '-'
def extract_spectrum_info(filename): # Open the file with open(filename, 'r') as f: xml = f.read() # Extract the info soup = BeautifulStoneSoup(xml) info = { 'id': soup.find('id').contents[0], 'database-id': soup.find('database-id').contents[0], 'inchi-key': soup.find('inchi-key').contents[0], 'solvent': soup.find('solvent').contents[0], 'sample-ph': soup.find('sample-ph').contents[0], 'frequency': soup.find('frequency').contents[0] } # Enforce spectrum validity rules if (info['solvent'] == VALID_SOLVENT and float(info['sample-ph']) <= MAX_PH and float(info['sample-ph']) >= MIN_PH): # Get the inchi code try: info['inchi-code'] = \ urllib2.urlopen(INCHI_RESOLVER_URL.format(info['inchi-key'])).read() time.sleep(TIME_BETWEEN_URL_REQUESTS) return info except: # Any exception, don't care which return None else: return None
def xml_to_dict (self, data): from BeautifulSoup import BeautifulStoneSoup as BS soup = BS(data) username = soup.find('db:uid').contents[0] uid = soup.find('id').contents[0].split('/')[-1] title = soup.find('title').contents[0] return {'id':uid, 'username':username,'title':title}
def play(url=common.args.url,playrtmp=True): # GET DETAILS FROM API #url = build_api_url('details','',ID=id,ios=True) #data = common.getURL(url) #if data: # for mediaUrl in demjson.decode(data)['MediaURLs']: # if mediaUrl['type'] == '480p_1mbps.mp4': # finalurl=mediaUrl['path'] # GET ID FROM HTTP PAGE #data = common.getURL(url) #id,paremeters=re.compile("StartPlayer \((.+?), '(.+?)',").findall(data)[0] #Get file path vidwall = 'http://www.crackle.com/app/vidwall.ashx?flags=-1&fm=%s&partner=20' % url data = common.getURL(vidwall) tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) filepath = tree.find('i')['p'] if playrtmp: # RTMP STREAMING rtmpbase = tree.find('channel')['strrtmpcdnurl'] swfUrl = 'http://www.crackle.com/flash/ReferrerRedirect.ashx' finalurl = 'rtmp://'+rtmpbase+'/'+filepath+'480p_1mbps.mp4'+' swfurl='+swfUrl+" swfvfy=true" else: # HTTP STREAMING finalurl = 'http://media-us-am.crackle.com/'+filepath+'480p_1mbps.mp4' item = xbmcgui.ListItem(path=finalurl) return xbmcplugin.setResolvedUrl(pluginhandle, True, item)
def handle(self, *args, **kwargs): resource = urlopen(TRAIL_REPORT_URL) soup = BeautifulStoneSoup(resource) lift = soup.find("lifts") cache.set(TRAIL_REPORT_CACHE_KEY, { "total": lift.get("total"), "open": lift.get("totalopen"), }, 7 * 24 * 60 * 60) resource = urlopen(WEATHER_REPORT_URL) soup = BeautifulStoneSoup(resource) report = soup.findAll("report")[1] forecast = [] weather_data = { "temperature": report.get("temp"), "surface": report.get("surfaceconditions"), } for i in range(1, 5): day = soup.find("day%d" % i) if day: forecast.append({ "day": day.get("day"), "status": WEATHER_TYPES[int(day.get("icon"))], }) weather_data["forecast"] = forecast cache.set(WEATHER_REPORT_CACHE_KEY, weather_data, 7 * 24 * 60 * 60)
def QueueStatus(cls, api_key, _hash): template_queue = \ """ <?xml version="1.0" encoding="utf-8" ?> <queue> <apiKey>%s</apiKey> <hash>%s</hash> </queue> """ %(api_key, _hash) site = "http://api.online-convert.com/queue-status" d = dict() d["queue"] = template_queue params = urllib.urlencode(d) conn = urllib.urlopen(site, params) data = conn.read() # d = dict() soup = BeautifulStoneSoup(data) try: d["code"] = soup.find("code").contents[0] except: d["code"] = -1 try: d["download_counter"] = soup.find("downloadcounter").contents[0] except: d["download_counter"] = -1 try: d["date_processed"] = soup.find("dateprocessed").contents[0] except: d["date_processed"] = -1 try: d["direct_download"] = soup.find("directdownload").contents[0] except: d["direct_download"] = -1 try: d["source_checksum"] = soup.find("source_checksum").contents[0] except: d["source_checksum"] = -1 try: d["checksum"] = soup.find("checksum").contents[0] except: d["checksum"] = -1 try: d["target_size"] = soup.find("target_size").contents[0] #in KB except: d["target_size"] = -1 try: d["target_type"] = soup.find("convert_to").contents[0] except: d["target_type"] = -1 try: d["mime_type"] = soup.find("mime_type").contents[0] except: d["mime_type"] = -1 try: d["hash"] = soup.find("hash").contents[0] except: d["hash"] = -1 return d
def updateStatusPriority(self): url = r"http://code.google.com/feeds/issues/p/ankidroid/issues/full?id=" + str( self.issueName) updated = False try: result = fetch(url) if result.status_code == 200: soup = BeautifulStoneSoup(result.content) status = soup.find('issues:status') if status: self.status = unicode(status.string) updated = True logging.debug("Setting status to '" + self.status + "'") priority = soup.find(name='issues:label', text=re.compile(r"^Priority-.+$")) if priority: self.priority = re.search("^Priority-(.+)$", unicode( priority.string)).group(1) updated = True logging.debug("Setting priority to '" + self.priority + "'") except Error, e: logging.error("Error while retrieving status and priority: %s" % str(e))
def episodes(url): print "Episodes: " + url req = urllib2.Request(url) req.add_header('User-Agent',userAgent) response = urllib2.urlopen(req) link=response.read() soup = BeautifulStoneSoup(link) response.close() episodes=re.compile('<a href="(.+?)">\s*<img src="(.+?)".*>\s*<h3>(.+?)</h3>').findall(link) count = len(episodes) prevPage = soup.find('a', text="<") if( prevPage != None): count = count + 1 addDir("<< Previous Page", prevPage.parent['href'], 1, '', count) for href, thumb, name in episodes: addLink(name,site+href,site+thumb,count) nextPage = soup.find('a', text=">") if( nextPage != None): addDir("Next Page >>", nextPage.parent['href'], 1, '', count)
def parse_config(file_to_read): parsed = BeautifulStoneSoup(open(file_to_read).read()) adapters = parsed.findAll('adapter') if (not adapters): adapters = parsed.findAll('interface') host_tag = parsed.find('hostname') if host_tag: host_name = host_tag.string.lower() else: host_name = None domain_tag = parsed.find('domainname') if domain_tag: domain_name = domain_tag.string if domain_name: domain_name = domain_name.lower() else: domain_name = None ip_list = [] for adapter in adapters: mac = (adapter.find('address').string if adapter.find('address') else None) if mac: mac = mac.replace('-', ':').lower() adapter_ips = adapter.findAll('adapterip') for adapter_ip_node in adapter_ips: if (not adapter_ip_node): continue ip = '' for ip_address in adapter_ip_node.find('ip'): ip = ip_address.string.strip() if (not ip): continue info = {'host_name': host_name, 'domain_name': domain_name, 'ip_address': ip, 'mac_address': mac} if ((info not in ip_list) and (ip != '127.0.0.1') and (':' not in ip)): ip_list.append(info) return ip_list
def process_save_order_response(response, ad_rep_order, order_dict): """ Validate response from save order soap call. If success, a firestorm order id will be saved to the ad_rep order. """ LOG.debug('SaveOrder Response: %s' % response) soup = BeautifulStoneSoup(response) LOG.debug(soup.prettify()) if soup.find('status').string == 'SUCCESS': firestorm_order_id = soup.find('id').string LOG.debug("Success: Firestorm Order ID: %s" % firestorm_order_id) try: ad_rep_order = AdRepOrder.objects.get(order=ad_rep_order.order) ad_rep_order.firestorm_order_id = int(firestorm_order_id) ad_rep_order.save() except ValueError: LOG.error("Invalid Firestorm Order ID: %s" % firestorm_order_id) else: LOG.error('AdRepOrder %s order_dict: %s' % (ad_rep_order.order, order_dict)) errors = '' for error_msg in soup.findAll('errormsg'): LOG.error(error_msg.string) errors += error_msg.string + '. ' error_message = 'SaveOrder: %s error: %s' % (ad_rep_order.order, errors) LOG.error(error_message) raise ValidationError(error_message)
def vendorWorker(thread_num, l): while (True and processVendors): detail = vendors_queue.get() sys.stderr.write('Parsing vendors for category detail for %s at %s\n' % ( detail['detail_description'], detail['vendor_url'])) my_mech = mechanize.Browser() vendor_request = my_mech.open(detail['vendor_url']) vendor_response = vendor_request.read() vendor_soup = BeautifulStoneSoup(vendor_response, convertEntities=BeautifulStoneSoup.ALL_ENTITIES) vendor_table = vendor_soup.find('table', id='grdAgency') vendor_page_dropdown = vendor_soup.find('select', id='MozillaPager1_ddlPageNumber') vendor_pages = [] if vendor_page_dropdown: for vendor_page in vendor_page_dropdown.findAll('option'): vendor_pages.append(vendor_page['value']) else: sys.stderr.write("No page drop down on %s.\n" % detail['vendor_url']) vendors = parseVendorTable(vendor_response) l.acquire() detail['vendors'].extend(vendors) l.release() for page in vendor_pages[1:]: sys.stderr.write(' ... Page %s from %s\n' % (page, detail['vendor_url'])) my_mech.select_form('ctl02') my_mech.form.set_value([page], 'MozillaPager1$ddlPageNumber') vendor_page_request = my_mech.form.click('MozillaPager1$btnPageNumber') vendors = parseVendorTable(mechanize.urlopen(vendor_page_request).read()) l.acquire() detail['vendors'].extend(vendors) l.release() vendors_queue.task_done()
def tv_db_id_lookup(seriesid,seriesname): tv_api_key = '03B8C17597ECBD64' mirror = 'http://thetvdb.com' banners = 'http://thetvdb.com/banners/' if seriesid: series_xml = mirror+('/api/%s/series/%s/en.xml' % (tv_api_key, seriesid)) series_xml = common.getURL(series_xml) tree = BeautifulStoneSoup(series_xml, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) try: genre = tree.find('genre').string genre = genre.replace("|",",") genre = genre.strip(",") except: print '%s - Genre Failed' % seriesname genre = None try: aired = tree.find('firstaired').string except: print '%s - Air Date Failed' % seriesname aired = None try: banner = banners + tree.find('banner').string except: print '%s - Banner Failed' % seriesname banner = None try: fanart = banners + tree.find('fanart').string except: print '%s - Fanart Failed' % seriesname fanart = None try: poster = banners + tree.find('poster').string except: print '%s - Poster Failed' % seriesname poster = None return banner, poster, fanart, genre, aired, seriesid else: return None,None,None,None,None,None
def parse(cls_, file_handle, fail_fast=True): ''' parse is the main entry point for an OfxParser. It takes a file handle and an optional log_errors flag. If fail_fast is True, the parser will fail on any errors. If fail_fast is False, the parser will log poor statements in the statement class and continue to run. Note: the library does not guarantee that no exceptions will be raised to the caller, only that statements will include bad transactions (which are marked). ''' cls_.fail_fast = fail_fast if isinstance(file_handle, type('')): raise RuntimeError(u"parse() takes in a file handle, not a string") ofx_obj = Ofx() # Store the headers ofx_file = OfxFile(file_handle) ofx_obj.headers = ofx_file.headers ofx_obj.accounts = [] ofx = BeautifulStoneSoup(ofx_file.fh) if len(ofx.contents) == 0: raise OfxParserException('The ofx file is empty!') stmtrs_ofx = ofx.findAll('stmtrs') if stmtrs_ofx: ofx_obj.accounts += cls_.parseStmtrs(stmtrs_ofx, AccountType.Bank) ccstmtrs_ofx = ofx.findAll('ccstmtrs') if ccstmtrs_ofx: ofx_obj.accounts += cls_.parseStmtrs( ccstmtrs_ofx, AccountType.CreditCard) invstmtrs_ofx = ofx.findAll('invstmtrs') if invstmtrs_ofx: ofx_obj.accounts += cls_.parseInvstmtrs(invstmtrs_ofx) seclist_ofx = ofx.find('seclist') if seclist_ofx: ofx_obj.security_list = cls_.parseSeclist(seclist_ofx) else: ofx_obj.security_list = None acctinfors_ofx = ofx.find('acctinfors') if acctinfors_ofx: ofx_obj.accounts += cls_.parseAcctinfors(acctinfors_ofx, ofx) fi_ofx = ofx.find('fi') if fi_ofx: for account in ofx_obj.accounts: account.institution = cls_.parseOrg(fi_ofx) if ofx_obj.accounts: ofx_obj.account = ofx_obj.accounts[0] return ofx_obj
def extract_tokens(self, html): soup = BeautifulStoneSoup( html, convertEntities=BeautifulStoneSoup.ALL_ENTITIES) title = soup.find('title').text self.title = title body = soup.find('body') tokens = self.find_tokens(body) self.tokens = tokens
def getParams(): query = scrape(queryPage) querySoup = BeautifulStoneSoup(query,convertEntities=BeautifulStoneSoup.HTML_ENTITIES) flatTypes = [(tag['value'],tag.contents[0]) for tag in querySoup.find('select', {'name':'FLAT_TYPE'} ).findAll('option') if len(tag['value'])>0] towns = [tag['value'] for tag in querySoup.find('select', {'name':'NME_NEWTOWN'}).findAll('option') if len(tag['value'])>0] dates = [tag['value'] for tag in querySoup.find('select', {'name':'DTE_APPROVAL_FROM'}).findAll('option') if len(tag['value'])>0] return (dict(flatTypes), towns, dates)
def getVideoURL(self, smilUrl): data = urllib2.urlopen(smilUrl).read() htmlTree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) base = htmlTree.find('meta')['base'] filepath = htmlTree.find('video')['src'] url = base + " playpath=" + filepath return url
def update(self): usock = urllib.urlopen(self.main_url + self.station_url + str(self.number)) xml_data = usock.read() usock.close() soup = BeautifulStoneSoup(xml_data) self.bikes = int(soup.find('bikes').contents[0]) self.free = int(soup.find('attachs').contents[0]) self.timestamp = datetime.now() return self
def play(url=common.args.url): print "DEBUG Entering play function" swfUrl = 'http://can.cbs.com/thunder/player/chrome/canplayer.swf' if 'http://' in url: data = common.getURL(url) try: pid = re.compile('var pid = "(.*?)";').findall(data)[0] except: pid = re.compile("var pid = '(.*?)';").findall(data)[0] else: pid = url # OLD URL #url = "http://release.theplatform.com/content.select?format=SMIL&Tracking=true&balance=true&MBR=true&pid=" + pid url = "http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&Tracking=true&mbr=true" % pid if (common.settings['enableproxy'] == 'true'): proxy = True else: proxy = False data = common.getURL(url, proxy=proxy) tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) if (common.settings['enablesubtitles'] == 'true'): closedcaption = tree.find('param', attrs={'name': 'ClosedCaptionURL'}) if (closedcaption is not None): xml_closedcaption = common.getURL(closedcaption['value']) convert_subtitles(xml_closedcaption, pid) rtmpbase = tree.find('meta') if rtmpbase: rtmpbase = rtmpbase['base'] items = tree.find('switch').findAll('video') hbitrate = -1 sbitrate = int(common.settings['quality']) * 1024 for item in items: bitrate = int(item['system-bitrate']) if bitrate > hbitrate and bitrate <= sbitrate: hbitrate = bitrate playpath = item['src'] if '.mp4' in playpath: playpath = 'mp4:' + playpath else: playpath = playpath.replace('.flv', '') finalurl = rtmpbase + ' playpath=' + playpath + " swfurl=" + swfUrl + " swfvfy=true" item = xbmcgui.ListItem(path=finalurl) xbmcplugin.setResolvedUrl(pluginhandle, True, item) if (common.settings['enablesubtitles'] == 'true') and (closedcaption is not None): while not xbmc.Player().isPlaying(): print 'CBS--> Not Playing' xbmc.sleep(100) subtitles = os.path.join(common.pluginpath, 'resources', 'cache', pid + '.srt') print "CBS --> Setting subtitles" xbmc.Player().setSubtitles(subtitles)
def main(): """ """ #process options usage = "Usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-d", "--dry-run", dest="dry_run", help="don't actually write the output file", action="store_true", default=False) (options, args) = parser.parse_args() #parse the xml file with beautifulsoup with open(INPUT_PATH) as fh: soup = BeautifulStoneSoup(fh.read(), selfClosingTags=['contact']) #purge read-only (i.e. LDAP) users from the XML soup for user in soup.findAll('user', attrs={'read-only': 'true'}): user.extract() #connect to ldap, get list of users con = bind(USERNAME, PASSWORD, HOST, DOMAIN) if globals().has_key('MEMBER_GROUPS'): memberships = [] for group in MEMBER_GROUPS: memberships.append("(memberOf=cn=%s,%s)" % (group, BASE_DN)) users = search(con, BASE_DN, searchFilter=u'(|%s)' % "".join(memberships)) else: users = search(con, BASE_DN) #add user tag for each user in list to the XML soup for user in users: dn, attrs = user add_user_tag(soup, attrs) #update timestamp soup.find('created').string.replaceWith( time.strftime("%A, %d %B %Y %H:%M:%S o'clock %Z")) #BeautifulSoup / SGMLParser will lowercase 'serviceProvider' because all attributes in XML #should be lowercase. But OpenNMS is case-sensitive, so we force it back to being broken out = re.sub("serviceprovider", "serviceProvider", soup.prettify()) #write file or show what we would've written if not options.dry_run: with open(OUTPUT_PATH, 'w') as fh: fh.write(out) else: print "Generated:\n" print out return 0
def get(self, agency, line, direction): directions = get_xml('http://webservices.nextbus.com/service/publicXMLFeed?command=routeConfig&a=' + agency + '&r=' + line) soup = BeautifulStoneSoup(directions, selfClosingTags=['stop']) stop_ids = soup.find('direction', tag=direction).findAll('stop') html = '<?xml version="1.0" encoding="utf-8" ?><body>' for stop_id in stop_ids: stop = soup.find('stop', tag=stop_id['tag']) html += '<choice tag="' + stop['tag'] + '" title="' + stop['title'].replace("&", "and") + '">' html += '</body>' self.response.out.write(html)
def play(url = common.args.url): videoname = url.split('/')[-2] smil = 'http://video.nationalgeographic.com/video/player/data/xml/%s.smil' % videoname data = common.getURL(smil) tree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) base = tree.find('meta',attrs={'name':'httpBase'})['content'] filepath = tree.find('video')['src'] final = base + filepath+'?v=1.2.17&fp=MAC%2011,1,102,62'+'&r='+randomstring(5)+'&g='+randomstring(12) item = xbmcgui.ListItem(path=final) xbmcplugin.setResolvedUrl(pluginhandle, True, item)
def _getCourseListing(self): xml = urllib2.urlopen(ebRss) soup = BeautifulStoneSoup(xml) tags = soup.findAll('link') eids = [] courses = {} global venue for tag in tags: match = re.search(r"(event/)(\d+)(/rss)", str(tag)) if match: print "Found EventBrite ID %s : %s"%(match.group(2), str(tag)) eids.append(match.group(2)) for eid in eids: print "Querying EventBrite API for %s"%(eid) xml = urllib2.urlopen('https://www.eventbrite.com/xml/event_get?app_key=%s&id=%s'%(appkey, eid)) soup = BeautifulStoneSoup(xml) startdate = self._fixText(soup.find('start_date')) enddate = self._fixText(soup.find('end_date')) title = self._fixText(soup.find('title')) #desc = self._fixText(soup.find('description')) if not venue: venueXML = soup.find('venue') name = str(venueXML.find('name')) address = str(venueXML.find('address')) address2 = str(venueXML.find('address_2')) city = str(venueXML.find('city')) region = str(venueXML.find('region')) zip = str(venueXML.find('postal_code')) list = [name, address, address2, city, region] venue = self._fixText(", ".join(list) + " " + zip) print "Setting Venue: " + venue urls = soup.findAll('url') url = "" for addr in urls: m = re.search(r"\d+", str(addr)) if m: url = self._fixText(addr) startdate = time.gmtime(time.mktime(time.strptime(startdate, "%Y-%m-%d %H:%M:%S"))) enddate = time.gmtime(time.mktime(time.strptime(enddate, "%Y-%m-%d %H:%M:%S"))) desc = '<a href="%s">Click Here</a> for more info.'%(url) thisCourse = {'title':title, 'desc':desc, 'startdate':startdate, 'enddate':enddate, 'url':url} courses[eid] = thisCourse return courses
def login(self): """ Read greeting """ greeting = self.read() soup = BeautifulStoneSoup(greeting) svid = soup.find('svid') version = soup.find('version') print("Connected to %s (v%s)\n" % (svid.text, version.text)) """ Login """ xml = commands.login % self.config if not self.cmd(xml, silent=True): exit(1)
def read(self, xml, identifier): """ Load a JATS/NLM (PubMed) XML into a SciDoc. :param xml: full xml string :type xml: basestring :param identifier: an identifier for this document, e.g. file name If an actual full path, the path will be removed from it when stored :type identifier: basestring :returns: :class:`SciDoc <SciDoc>` object :rtype: SciDoc """ # this solves a "bug" in BeautifulStoneSoup with "sec" tags BeautifulStoneSoup.NESTABLE_TAGS["sec"]=[] #xml=fixNumberCitationsXML(xml) soup=BeautifulStoneSoup(xml) # Create a new SciDoc to store the paper newDocument=SciDoc() metadata=newDocument["metadata"] metadata["filename"]=os.path.basename(identifier) metadata["original_citation_style"]=detectCitationStyle(xml) body=soup.find("body") if not body: # TODO: Make the error handling less terrible debugAddMessage(newDocument,"error","NO <BODY> IN THIS PAPER! file: "+identifier) newDocument["metadata"]["guid"]=cp.Corpus.generateGUID() return newDocument # Load metadata, either from corpus or from file self.loadJATSMetadataFromPaper(newDocument, soup) metadata["guid"]=cp.Corpus.generateGUID(metadata) # Load all references from the XML back=soup.find("back") if back: ref_list=back.find("ref-list") # other things in <back> like appendices: ignore them for now if ref_list: for ref in ref_list.findAll("ref"): self.loadJATSReference(ref, newDocument) newDocument.updateReferences() # Load Abstract self.loadJATSAbstract(soup,newDocument) for sec in body.findChildren("sec", recursive=False): self.loadJATSSection(sec, newDocument, "root") newDocument.updateAuthorsAffiliations() return newDocument
def login(self): """ Read greeting """ greeting = self.read() soup = BeautifulStoneSoup(greeting) svid = soup.find('svid') version = soup.find('version') print("Connected to %s (v%s)\n" % (svid.text, version.text)) """ Login """ xml = commands.login % self.config if not self.cmd(xml): raise Exception('Error: Unable to login')
def play(url=common.args.url): videoname = url.split('/')[-2] smil = 'http://video.nationalgeographic.com/video/player/data/xml/%s.smil' % videoname data = common.getURL(smil) tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) base = tree.find('meta', attrs={'name': 'httpBase'})['content'] filepath = tree.find('video')['src'] final = base + filepath + '?v=1.2.17&fp=MAC%2011,1,102,62' + '&r=' + randomstring( 5) + '&g=' + randomstring(12) item = xbmcgui.ListItem(path=final) xbmcplugin.setResolvedUrl(pluginhandle, True, item)
def auth(self): response = self.get_ticket_granting_ticket(self.USER, self.PASS) html = BeautifulSoup(response) tgt = html.body.form["action"] st = self.get_service_ticket(tgt) vld = self.validate_service(st) xml = BeautifulStoneSoup(vld) iou = xml.find('cas:proxygrantingticket').string if xml.find('cas:proxygrantingticket') else None self.PGT = self.get_proxy_granting_ticket(iou) return self.PGT
def play(url = common.args.url): print "DEBUG Entering play function" swfUrl = 'http://can.cbs.com/thunder/player/chrome/canplayer.swf' if 'http://' in url: data=common.getURL(url) try: pid = re.compile('video.settings.pid = "(.*?)";').findall(data)[0] except: pid = re.compile("video.settings.pid = '(.*?)';").findall(data)[0] else: pid = url # OLD URL #url = "http://release.theplatform.com/content.select?format=SMIL&Tracking=true&balance=true&MBR=true&pid=" + pid url = "http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&Tracking=true&mbr=true" % pid if (common.settings['enableproxy'] == 'true'): proxy = True else: proxy = False data=common.getURL(url,proxy=proxy) tree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) if (common.settings['enablesubtitles'] == 'true'): closedcaption = tree.find('param',attrs={'name':'ClosedCaptionURL'}) if (closedcaption is not None): xml_closedcaption = common.getURL(closedcaption['value']) convert_subtitles(xml_closedcaption,pid) rtmpbase = tree.find('meta') if rtmpbase: rtmpbase = rtmpbase['base'] items=tree.find('switch').findAll('video') hbitrate = -1 sbitrate = int(common.settings['quality']) * 1024 for item in items: bitrate = int(item['system-bitrate']) if bitrate > hbitrate and bitrate <= sbitrate: hbitrate = bitrate playpath = item['src'] if '.mp4' in playpath: playpath = 'mp4:'+playpath else: playpath = playpath.replace('.flv','') finalurl = rtmpbase+' playpath='+playpath + " swfurl=" + swfUrl + " swfvfy=true" item = xbmcgui.ListItem(path=finalurl) xbmcplugin.setResolvedUrl(pluginhandle, True, item) if (common.settings['enablesubtitles'] == 'true') and (closedcaption is not None): while not xbmc.Player().isPlaying(): print 'CBS--> Not Playing' xbmc.sleep(100) subtitles = os.path.join(common.pluginpath,'resources','cache',pid+'.srt') print "CBS --> Setting subtitles" xbmc.Player().setSubtitles(subtitles)
def RESOLVE(index): #, info doc = tools.gethtmlpage("%s/playlist/null/%s" % (ziln_urls["ZILN"], index)) if doc: soup = BeautifulStoneSoup(doc) #tools.message(soup.find('media:content')["url"]) #minidom.parseString(doc).documentElement.getElementsByTagName("media:content")[0].attributes["url"].value info = tools.defaultinfo(0) info["Title"] = soup.find('item').title.contents[0] info["Thumb"] = soup.find('jwplayer:image').contents[0] info["Plot"] = soup.find('description').contents[0] uri = "%s%s" % (ziln_urls["ZILN"], soup.find('media:content')["url"]) tools.addlistitem(int(sys.argv[1]), info, ziln_urls["Fanart"], 0, 1, uri)
def _extract_pmid_links_from_xml(raw_xml): try: soup = BeautifulStoneSoup(raw_xml) # Verify we got a valid page assert(soup.find("elinkresult")) # Now get the linkset part linksetdb_soup = BeautifulStoneSoup(str(soup.find(text="pmc_pubmed").findParents('linksetdb'))[1:-1]) pmids = list_of_tag_contents(linksetdb_soup, "id") except AttributeError: # No links found pmids = [] return(pmids)
def update(self, prefix=""): #print "Updating "+str(self.number) if self.station_url == "": self.station_url = STATION_URL % self.city usock = urllib.urlopen(prefix + self.main_url + self.station_url + str(self.number)) xml_data = usock.read() usock.close() soup = BeautifulStoneSoup(xml_data) self.bikes = int(soup.find('available').contents[0]) self.free = int(soup.find('free').contents[0]) self.timestamp = datetime.now() return self
def _extract_pmid_links_from_xml(raw_xml): try: soup = BeautifulStoneSoup(raw_xml) # Verify we got a valid page assert (soup.find("elinkresult")) # Now get the linkset part linksetdb_soup = BeautifulStoneSoup( str(soup.find(text="pmc_pubmed").findParents('linksetdb'))[1:-1]) pmids = list_of_tag_contents(linksetdb_soup, "id") except AttributeError: # No links found pmids = [] return (pmids)
def get_menu(url=FEED_URL, tomorrow=False, die_on_closed=False): """ Builds a Sharples menu from ``url``, returning a dictionary like this: { 'closed': False, 'message': "", 'lunch': "beef vegetable soup, potato leek, ...", 'dinner': "flank steak, baked stuffed potatoes, ..." } Note that we still return the menu if Sharples is closed, unless ``die_on_closed`` is set. If ``tomorrow`` is set, tries to figure out the menu for tomorrow. """ try: page = urllib2.urlopen(url) except urllib2.URLError: # TODO: log this error somehow message = "Sorry, it seems we're having some technical difficulties " \ "with figuring out the Sharples menu. Try checking the " \ "Dashboard or the Sharples website." return {'closed': True, 'message': message} feed = BeautifulStoneSoup(page, selfClosingTags=['closed']) data = {} data['closed'] = feed.closed['value'] == "1" data['message'] = feed.message.string or "" if data['closed'] and die_on_closed: return data week = feed.find("week", {'currentwk': '1'}) if tomorrow: day_name = (date.today() + timedelta(days=1)).strftime("%A") if day_name == "Saturday": num = int(week['value']) + 1 if num > NUM_WEEKS: num = 1 week = feed.find("week", {'value': str(num)}) else: day_name = date.today().strftime("%A") for item in week.find("day", {'value': day_name}).findAll("item"): meal = item.meal.string.strip() if meal: data[meal.lower()] = br.sub("<br />", item.menu.string.strip()) return data
def parse(cls_, file_handle): ofx_obj = Ofx() ofx = BeautifulStoneSoup(file_handle) stmtrs_ofx = ofx.find('stmtrs') if stmtrs_ofx: ofx_obj.bank_account = cls_.parseStmtrs(stmtrs_ofx) #westpac has "CCSTMTRS" else: stmtrs_ofx = ofx.find('ccstmtrs') if stmtrs_ofx: ofx_obj.bank_account = cls_.parseStmtrs(stmtrs_ofx) return ofx_obj
def GET_RTMP(vid): #url = 'http://www.tbs.com/video/cvp/videoData.jsp?oid='+vid url = 'http://www.tbs.com/tveverywhere/content/services/cvpXML.do?titleId=' + vid html = common.getURL(url) tree = BeautifulStoneSoup(html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) #print tree.prettify() files = tree.findAll('file') if not files: url = 'http://www.tbs.com/tveverywhere/content/services/cvpXML.do?titleId=&id=' + vid html = common.getURL(url) tree = BeautifulStoneSoup( html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) #print tree.prettify() files = tree.findAll('file') if files: html = common.getURL(url) tree = BeautifulStoneSoup( html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) print tree.prettify() sbitrate = int(common.settings['quality']) hbitrate = -1 files = tree.findAll('file') for filenames in files: try: bitrate = int(filenames['bitrate']) except: bitrate = 1 if bitrate > hbitrate and bitrate <= sbitrate: hbitrate = bitrate filename = filenames.string serverDetails = tree.find('akamai') if serverDetails: filename = filename[1:len(filename) - 4] serverDetails = tree.find('akamai') server = serverDetails.find('src').string.split('://')[1] #get auth tokentype = serverDetails.find('authtokentype').string window = serverDetails.find('window').string aifp = serverDetails.find('aifp').string auth = getAUTH(aifp, window, tokentype, vid, filename.replace('mp4:', '')) swfUrl = 'http://www.tbs.com/cvp/tbs_video.swf' link = 'rtmpe://' + server + '?' + auth + " swfurl=" + swfUrl + " swfvfy=true" + ' playpath=' + filename elif 'http://' in filename: link = filename else: link = 'http://ht.cdn.turner.com/tbs/big' + filename return link
def parse_auth(soup, iview_config): """ There are lots of goodies in the auth handshake we get back, but the only ones we are interested in are the RTMP URL, the auth token, and whether the connection is unmetered. """ xml = BeautifulStoneSoup(soup) # should look like "rtmp://203.18.195.10/ondemand" try: rtmp_url = xml.find('server').string # at time of writing, either 'Akamai' (usually metered) or 'Hostworks' (usually unmetered) stream_host = xml.find('host').string playpath_prefix = '' if stream_host == 'Akamai': playpath_prefix = config.akamai_playpath_prefix if rtmp_url is not None: # Being directed to a custom streaming server (i.e. for unmetered services). # Currently this includes Hostworks for all unmetered ISPs except iiNet. rtmp_chunks = rtmp_url.split('/') rtmp_host = rtmp_chunks[2] rtmp_app = rtmp_chunks[3] else: # We are a bland generic ISP using Akamai, or we are iiNet. rtmp_url = iview_config['rtmp_url'] rtmp_host = iview_config['rtmp_host'] rtmp_app = iview_config['rtmp_app'] token = xml.find("token").string token = token.replace('&', '&') # work around BeautifulSoup bug except: d = xbmcgui.Dialog() d.ok('iView Error', 'There was an iView handshake error.', 'Please try again later') return None return { 'rtmp_url': rtmp_url, 'rtmp_host': rtmp_host, 'rtmp_app': rtmp_app, 'playpath_prefix': playpath_prefix, 'token': token, 'free': (xml.find("free").string == "yes") }
def PLAYVIDEO(): #common.login() #orgin = 'http://dish.epixhd.com/epx/ajax/user/originstatus/' #print common.getURL(orgin,useCookie=True) #pageurl = 'http://www.epixhd.com/epx/ajax/theater/soloplayer'+common.args.url #print common.getURL(pageurl,useCookie=True) smilurl = 'http://www.epixhd.com/epx/smil'+common.args.url+'smil.xml' data = common.getURL(smilurl,useCookie=True) authurl = 'http://www.epixhd.com/epx/ajax/theater/getToken/?movie='+common.args.url.strip('/') auth = common.getURL(authurl,useCookie=True) tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) print tree.prettify() stackedUrl = 'stack://' if common.addon.getSetting("enablepreroll") == 'true': for preroll in tree.find('img').findAll('video',recursive=False): stackedUrl += buildrtmp(preroll['src'],auth).replace(',',',,')+' , ' quality = [0,3000000,2200000,1700000,1200000,900000,500000] lbitrate = quality[int(common.addon.getSetting("bitrate"))] mbitrate = 0 streams = [] movie_name = tree.find('mbrstream')['ma:asset_name'] common.args.asset_id = tree.find('mbrstream')['ma:asset_id'] for item in tree.find('mbrstream').findAll('video'): url = item['src'] bitrate = int(item['system-bitrate']) if lbitrate == 0: streams.append([bitrate/1000,url]) elif bitrate >= mbitrate and bitrate <= lbitrate: mbitrate = bitrate rtmpdata = url if lbitrate == 0: quality=xbmcgui.Dialog().select('Please select a quality level:', [str(stream[0])+'kbps' for stream in streams]) if quality!=-1: rtmpdata = streams[quality][1] else: return stackedUrl += buildrtmp(rtmpdata,auth).replace(',',',,') #p=ResumePlayer() item = xbmcgui.ListItem(path=stackedUrl) #item.setInfo( type="Video", infoLabels={"Title": movie_name}) xbmcplugin.setResolvedUrl(pluginhandle, True, item) #while not p.isPlaying(): # print 'EPIX --> Not Playing' # xbmc.sleep(100) #p.onPlayBackStarted()
def find_series_id(name): """Looks up the tvdb id for a series""" url = server + 'GetSeries.php?seriesname=%s&language=%s' % ( urllib.quote(name), language) try: page = requests.get(url).content except RequestException as e: raise LookupError("Unable to get search results for %s: %s" % (name, e)) xmldata = BeautifulStoneSoup( page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES).data if not xmldata: log.error("Didn't get a return from tvdb on the series search for %s" % name) return # See if there is an exact match # TODO: Check if there are multiple exact matches firstmatch = xmldata.find('series') if firstmatch and firstmatch.seriesname.string.lower() == name.lower(): return int(firstmatch.seriesid.string) # If there is no exact match, sort by airing date and pick the latest # TODO: Is there a better way to do this? Maybe weight name similarity and air date series_list = [(s.firstaired.string, s.seriesid.string) for s in xmldata.findAll('series', recursive=False) if s.firstaired] if series_list: series_list.sort(key=lambda s: s[0], reverse=True) return int(series_list[0][1]) else: raise LookupError('No results for `%s`' % name)
def parse_netbios(file_to_read): parsed = BeautifulStoneSoup(file(file_to_read).read()) adapters = parsed.findAll('adapter') if adapters: call_name = parsed.find('callname').string if call_name[0].isdigit(): ip_address = unicode(call_name.strip()) else: ip_address = None netbios_list = [] for adapter in adapters: mac_address = adapter['adapter_addr'].replace('.', ':').strip() names_list = adapter.findAll('names') host_name = None domain_name = None for names_elements in names_list: type = names_elements.find('type') name = names_elements.find('name') if (type.string == 'Workstation Service'): host_name = unicode(name.string.strip()).lower() elif (type.string == 'Domain Name'): domain_name = unicode(name.string.strip()).lower() netbios_list += [{ 'host_name': host_name, 'domain_name': domain_name, 'ip_address': ip_address, 'mac_address': mac_address }] return netbios_list
class Extract(): def __init__(self, xml, filename, game_name, away_team, home_team): self.xml = xml self.game_name = game_name self.filename = filename self.soup = BeautifulStoneSoup(self.xml) self.home_team = home_team self.away_team = away_team def extract(self): plays = self.splitRowsIntoPlays() row_indexes = self.getPeriodIndexes() indexed_plays = self.combinePlaysWithPeriodIndexes(row_indexes, plays) self.dumpToFile(indexed_plays) def getGameData(self): gamedata = self.soup.find("game") print gamedata.attrs def getPlayByPlayData(self): playbyplaydata = self.soup.findAll("event") for play in playbyplaydata: print dict(play.attrs) def dumpToFile(self, list_data): writer = csv.writer(open( LOGDIR_EXTRACT + self.filename + '_pbp_nbacom', 'wb'), delimiter=',', lineterminator='\n') writer.writerows(list_data)
def __init__(self, xml_path, deftype): self.type = deftype xml_file = open(xml_path) xml = xml_file.read() xml = xml.replace('<computeroutput>', '`').replace('</computeroutput>', '`') # TODO(josh11b): Should not use HTML entities inside ```...```. soup = BeautifulStoneSoup( xml, convertEntities=BeautifulStoneSoup.HTML_ENTITIES) self.name = soup.find('compoundname').text print('Making page with name ' + self.name + ' (from ' + xml_path + ')') members = soup('memberdef', prot='public') briefs = all_briefs(members) fulls = all_fulls(members) self.overview = page_overview(soup.find('compounddef')) self.page_text = PAGE_TEMPLATE.format( self.type, self.name, self.overview, fulls)