Python BeautifulStoneSoup.find Examples, BeautifulSoup.BeautifulStoneSoup.find Python Examples

Example #1

0

Show file

    def GetRating(self, id):

        self.GetInfo(id)

        url = 'http://rating.kinopoisk.ru/' + str(id) + '.xml'
        Data = Get_url(url)
        if Data:
            xml = BeautifulStoneSoup(Data)
            try:
                kp = xml.find('kp_rating')
                r_kp = kp.string.encode('UTF-8')
                v_kp = kp['num_vote'].encode('UTF-8')
            except:
                r_kp = '-'
                v_kp = '-'
            try:
                imdb = xml.find('imdb_rating')
                r_imdb = imdb.string.encode('UTF-8')
                v_imdb = imdb['num_vote'].encode('UTF-8')
            except:
                r_imdb = '-'
                v_imdb = '-'
            return r_kp, v_kp, r_imdb, v_imdb
        else:
            return '-', '-', '-', '-'

Example #2

0

Show file

File: pbs.py Project: AbsMate/bluecop-xbmc-repo

def play():
    smilurl=common.args.url+'&format=SMIL'
    data = common.getURL(smilurl)
    tree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
    print tree.prettify()
    base = tree.find('meta')
    if base:
        base = base['base']
        if 'rtmp://' in base:
            playpath=tree.find('ref')['src']
            if '.mp4' in playpath:
                playpath = 'mp4:'+playpath
            else:
                playpath = playpath.replace('.flv','')
            finalurl = base+' playpath='+playpath
        elif 'http://' in base:
            playpath=tree.find('ref')['src']
            finalurl = base+playpath
    else:
        finalurl=tree.find('ref')['src']
    item = xbmcgui.ListItem(path=finalurl)
    xbmcplugin.setResolvedUrl(pluginhandle, True, item)

Example #3

0

Show file

File: get-spending-by-division.py Project: plural/milwaukee-county-spending

def categoryWorker(thread_num, l):
  while (True and processCategories):
    department = category_queue.get()
    sys.stderr.write('Parsing categories for FY %s department %s, division %s at %s\n' % (
      department['fiscal_year'], department['department_name'],
      department['division_name'], department['division_detail_page_url']))
    my_mech = mechanize.Browser()
    category_request = my_mech.open(department['division_detail_page_url'])
    category_response = category_request.read()
    category_soup = BeautifulStoneSoup(category_response,
       convertEntities=BeautifulStoneSoup.ALL_ENTITIES)
    category_table = category_soup.find('table', id='grdAgency')
    category_page_dropdown = category_soup.find('select',
        id='MozillaPager1_ddlPageNumber')
    category_pages = []
    if category_page_dropdown:
      for category_page in category_page_dropdown.findAll('option'):
        category_pages.append(category_page['value'])
    else:
      sys.stderr.write("No page drop down on %s.\n" % department['division_detail_page_url'])

    department['categories'].extend(parseCategoryTable(category_response))

    for page in category_pages[1:]:
      sys.stderr.write(' ... Page %s from %s\n' % (page, department['division_detail_page_url']))
      my_mech.select_form('ctl02')
      my_mech.form.set_value([page], 'MozillaPager1$ddlPageNumber')
      category_page_request = my_mech.form.click('MozillaPager1$btnPageNumber')
      local_categories = parseCategoryTable(mechanize.urlopen(category_page_request).read())
      department['categories'].extend(local_categories)
    category_queue.task_done()

Example #4

0

Show file

File: extract_spectrum_info.py Project: Team-T2-NMR/NMR-T2

def extract_spectrum_info(filename):

  # Open the file
  with open(filename, 'r') as f:
    xml = f.read()
  
  # Extract the info
  soup = BeautifulStoneSoup(xml)
  info =   {'id': soup.find('id').contents[0],
            'database-id': soup.find('database-id').contents[0],
            'inchi-key': soup.find('inchi-key').contents[0],
            'solvent': soup.find('solvent').contents[0],
            'sample-ph': soup.find('sample-ph').contents[0],
            'frequency': soup.find('frequency').contents[0]}

  # Enforce spectrum validity rules
  if (  info['solvent'] == VALID_SOLVENT and
        float(info['sample-ph']) <= MAX_PH and
        float(info['sample-ph']) >= MIN_PH):
   
    # Get the inchi code
    try:
      info['inchi-code'] = \
          urllib2.urlopen(INCHI_RESOLVER_URL.format(info['inchi-key'])).read()
      time.sleep(TIME_BETWEEN_URL_REQUESTS)
      return info
    except: # Any exception, don't care which
      return None
  else:
    return None

Example #5

0

Show file

File: conversion.py Project: unhplace/place

def image_path_with_fgdc_to_world_file(image_path, world_file, srs, units="m"):
    image = Image.open(image_path)
    (width, height) = image.size

    xml_path = "%s.xml" % (os.path.splitext(image_path)[0])
    with open(xml_path, "r") as f:
        xml = BeautifulStoneSoup(f)

    north_bound = float(xml.find("northbc").text)
    west_bound = float(xml.find("westbc").text)
    south_bound = float(xml.find("southbc").text)
    east_bound = float(xml.find("eastbc").text)

    srs = "%s" % (srs)
    if not srs.startswith("EPSG:"):
        srs = "EPSG:%s" % (srs)

    (west_bound, north_bound) = latlng_to_srs(north_bound, west_bound, srs,
                                              units)
    (east_bound, south_bound) = latlng_to_srs(south_bound, east_bound, srs,
                                              units)

    x_pixel_width = (east_bound - west_bound) / width
    y_pixel_width = (south_bound - north_bound) / height

    for l in [x_pixel_width, 0, 0, y_pixel_width, west_bound, north_bound]:
        world_file.write("%s\n" % l)

    return world_file

Example #6

0

Show file

File: get-spending-by-category.py Project: plural/milwaukee-county-spending

def categoryDetailWorker(thread_num, l):
  while (True and processCategoryDetails):
    category = category_details_queue.get()
    sys.stderr.write('Parsing category details for %s at %s\n' % (
      category['name'], category['detail_url']))
    my_mech = mechanize.Browser()
    category_detail_request = my_mech.open(category['detail_url'])
    category_detail_response = category_detail_request.read()
    category_detail_soup = BeautifulStoneSoup(category_detail_response,
       convertEntities=BeautifulStoneSoup.ALL_ENTITIES)
    category_detail_table = category_detail_soup.find('table', id='grdCategories')
    category_detail_page_dropdown = category_detail_soup.find('select',
        id='MozillaPager1_ddlPageNumber')
    category_detail_pages = []
    if category_detail_page_dropdown:
      for category_detail_page in category_detail_page_dropdown.findAll('option'):
        category_detail_pages.append(category_detail_page['value'])
    else:
      sys.stderr.write("No page drop down on %s.\n" % category['detail_url'])

    category_details = parseCategoryDetailTable(category_detail_response)
    l.acquire()
    category['details'].extend(category_details)
    l.release()

    for page in category_detail_pages[1:]:
      sys.stderr.write(' ... Page %s from %s\n' % (page, category['detail_url']))
      my_mech.select_form('ctl02')
      my_mech.form.set_value([page], 'MozillaPager1$ddlPageNumber')
      category_detail_page_request = my_mech.form.click('MozillaPager1$btnPageNumber')
      category_details = parseCategoryDetailTable(mechanize.urlopen(category_detail_page_request).read())
      l.acquire()
      category['details'].extend(category_details)
      l.release()
    category_details_queue.task_done()

Example #7

0

Show file

File: weather.py Project: airstrike/python-telegram-bot

def get_weather(location):
    degree = '°'.decode('utf8')
    conditions = {}
    base_url = 'http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query='

    try:
        page = urllib.request.urlopen(base_url + location)
    except:
        return 'Could not open the page!'
    else:
        soup = BeautifulStoneSoup(page)
        conditions['location'] = soup.find('full').contents[0]

        if 2 >= len(conditions['location']):
            return 'Inexistent location: ' + location
        else:
            conditions['weather'] = soup.find('weather').contents[0]
            conditions['temp'] = soup.find('temperature_string').contents[0]

            pos = conditions['temp'].find(' ')
            conditions['temp'] = conditions['temp'][:pos] + degree + \
                    conditions['temp'][pos:]

            pos = conditions['temp'].rfind(' ')
            conditions['temp'] = conditions['temp'][:pos] + degree + \
                    conditions['temp'][pos:]

        page.close()

    return conditions

Example #8

0

Show file

def getXmlCursor(xml):
    """解析xml，获取当前查询的起始位置，每页数量，总共数量"""
    soup = BeautifulStoneSoup(xml)
    start = int(soup.find('opensearch:startindex').string)
    count = int(soup.find('opensearch:itemsperpage').string)
    totalCount = int(soup.find('opensearch:totalresults').string)
    return (start, count, totalCount)

Example #9

0

Show file

File: crackle.py Project: lukewiersma/redcop

def play(url=common.args.url, playrtmp=True):
    # GET DETAILS FROM API
    #url = build_api_url('details','',ID=id,ios=True)
    #data = common.getURL(url)
    #if data:
    #    for mediaUrl in demjson.decode(data)['MediaURLs']:
    #        if mediaUrl['type'] == '480p_1mbps.mp4':
    #            finalurl=mediaUrl['path']

    # GET ID FROM HTTP PAGE
    #data = common.getURL(url)
    #id,paremeters=re.compile("StartPlayer \((.+?), '(.+?)',").findall(data)[0]

    #Get file path
    vidwall = 'http://www.crackle.com/app/vidwall.ashx?flags=-1&fm=%s&partner=20' % url
    data = common.getURL(vidwall)
    tree = BeautifulStoneSoup(data,
                              convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    filepath = tree.find('i')['p']

    if playrtmp:
        # RTMP STREAMING
        rtmpbase = tree.find('channel')['strrtmpcdnurl']
        swfUrl = 'http://www.crackle.com/flash/ReferrerRedirect.ashx'
        finalurl = 'rtmp://' + rtmpbase + '/' + filepath + '480p_1mbps.mp4' + ' swfurl=' + swfUrl + " swfvfy=true"
    else:
        # HTTP STREAMING
        finalurl = 'http://media-us-am.crackle.com/' + filepath + '480p_1mbps.mp4'

    item = xbmcgui.ListItem(path=finalurl)
    return xbmcplugin.setResolvedUrl(pluginhandle, True, item)

Example #10

0

Show file

def parse_config(soup):
    """There are lots of goodies in the config we get back from the ABC.
		In particular, it gives us the URLs of all the other XML data we
		need.
	"""

    soup = soup.replace('&amp;', '&#38;')

    xml = BeautifulStoneSoup(soup)

    # should look like "rtmp://cp53909.edgefcs.net/ondemand"
    # Looks like the ABC don't always include this field.
    # If not included, that's okay -- ABC usually gives us the server in the auth result as well.
    rtmp_url = xml.find('param', attrs={
        'name': 'server_streaming'
    }).get('value')
    rtmp_chunks = rtmp_url.split('/')

    return {
        'rtmp_url':
        rtmp_url,
        'rtmp_host':
        rtmp_chunks[2],
        'rtmp_app':
        rtmp_chunks[3],
        'api_url':
        xml.find('param', attrs={
            'name': 'api'
        }).get('value'),
        'categories_url':
        xml.find('param', attrs={
            'name': 'categories'
        }).get('value'),
    }

Example #11

0

Show file

File: poster.py Project: cksopher/cms_utility

def geocode(address="", city="", state="CA"):
    address = urllib.quote(address.encode('utf-8'))
    g_url = 'http://local.yahooapis.com/MapsService/V1/geocode?appid='
    g_url += '0MoPk9DV34FH0rumXB_xENjSlf.jdG4woRO9nFqyUcM86nLsFSynUvAwZZo6g--'
    g_url += '&street=%s&city=%s&state=%s' % (address, city, state)

    url = urllib.urlopen(g_url)
    dom = BeautifulStoneSoup(url)
    url.close()

    coords = { 'address': None, 'latitude': None, 'longitude': None, }

    result_attr = dom.find('result')

    if result_attr and result_attr['precision'] == 'address':

        dom_fields = ['address', 'latitude', 'longitude']
        for field in dom_fields:
            i = dom.find(field)
            if i:
                if field == 'address': 
                    coords[field] = i.string
                else:
                    try:
                        coords[field] = float(i.string)
                    except:
                        pass

    return coords

Example #12

0

Show file

File: weather.py Project: bhavyanshu/lulzbot-telegram-bot

def get_weather(location):
    degree = '°'.decode('utf8')
    conditions = {}
    base_url = 'http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query='

    try:
        page = urllib.urlopen(base_url + location)
    except:
        return 'Could not open the page!'
    else:
        soup = BeautifulStoneSoup(page)
        conditions['location'] = soup.find('full').contents[0]

        if 2 >= len(conditions['location']):
            return 'Inexistent location: ' + location
        else:
            conditions['weather'] = soup.find('weather').contents[0]
            conditions['temp'] = soup.find('temperature_string').contents[0]

            pos = conditions['temp'].find(' ')
            conditions['temp'] = conditions['temp'][:pos] + degree + \
                    conditions['temp'][pos:]

            pos = conditions['temp'].rfind(' ')
            conditions['temp'] = conditions['temp'][:pos] + degree + \
                    conditions['temp'][pos:]

        page.close()

    return conditions

Example #13

0

Show file

File: common.py Project: lukewiersma/redcop

def cablelogin(selected):
    if os.path.isfile(COOKIEFILE):
        if addon.getSetting("clearcookies") == 'true':
            os.remove(COOKIEFILE)
        else:
            return
    data = getURL('http://www.epixhd.com/epx/ajax/chooseMSO/?mso_id=' +
                  selected)
    jsondata = demjson.decode(data)
    tree = BeautifulStoneSoup(jsondata['content'],
                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
    try:
        signinUrl = tree.find('iframe')['src']
        provider = tree.find('iframe')['class']
    except:
        signinUrl = re.compile(
            '<script language="javascript">self.parent.location="(.*?)";'
        ).findall(jsondata['content'])[0]
        provider = 'cox'
    br = mechanize.Browser()
    br.set_handle_robots(False)
    br.set_cookiejar(cj)
    br.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.17) Gecko/20110422 Ubuntu/10.10 (maverick) Firefox/3.6.17'
    )]
    sign_in = br.open(signinUrl)
    if provider == 'charter':
        br.select_form(name="f")
        br["username"] = addon.getSetting("login_name")
        br["password"] = addon.getSetting("login_pass")
        br["zipcode"] = addon.getSetting("zipcode")
    elif provider == 'cox':
        br.select_form(name="LoginPage")
        br["username"] = addon.getSetting("login_name")
        br["password"] = addon.getSetting("login_pass")
    elif provider == 'dish':
        br.select_form(name="f")
        br["username"] = addon.getSetting("login_name")
        br["password"] = addon.getSetting("login_pass")
    elif provider == 'mediacom':
        br.select_form(name="f")
        br["username"] = addon.getSetting("login_name")
        br["password"] = addon.getSetting("login_pass")
    elif provider == 'suddenlink':
        br.select_form(name="f")
        br["username"] = addon.getSetting("login_name")
        br["password"] = addon.getSetting("login_pass")
    elif provider == 'verizon':
        br.select_form(name="loginpage")
        br["IDToken1"] = addon.getSetting("login_name")
        br["IDToken2"] = addon.getSetting("login_pass")
    br.submit()
    br.select_form(nr=0)
    response = br.submit()
    data = response.read()
    redirect = 'http://www.epixhd.com' + re.compile(
        'self.parent.location="(.*?)"').findall(data)[0]
    print getURL(redirect)
    cj.save(COOKIEFILE, ignore_discard=True, ignore_expires=True)

Example #14

0

Show file

File: conversion.py Project: iatkin/place

def image_path_with_fgdc_to_world_file(image_path, world_file, srs, units="m"):
    image = Image.open(image_path)
    (width, height) = image.size

    xml_path = "%s.xml" % (os.path.splitext(image_path)[0])
    with open(xml_path, "r") as f:
        xml = BeautifulStoneSoup(f)

    north_bound = float(xml.find("northbc").text)
    west_bound = float(xml.find("westbc").text)
    south_bound = float(xml.find("southbc").text)
    east_bound = float(xml.find("eastbc").text)

    srs = "%s" % (srs)
    if not srs.startswith("EPSG:"):
        srs = "EPSG:%s" % (srs)

    (west_bound, north_bound) =  latlng_to_srs(north_bound, west_bound, srs, units) 
    (east_bound, south_bound) = latlng_to_srs(south_bound, east_bound, srs, units)

    x_pixel_width = (east_bound - west_bound) / width
    y_pixel_width = (south_bound - north_bound) / height

    for l in [x_pixel_width, 0, 0, y_pixel_width, west_bound, north_bound]:
        world_file.write("%s\n" % l)

    return world_file

Example #15

0

Show file

File: parser.py Project: aperkins81/python-iview

def parse_config(soup):
	"""	There are lots of goodies in the config we get back from the ABC.
		In particular, it gives us the URLs of all the other XML data we
		need.
	"""

	soup = soup.replace('&amp;', '&#38;')

	xml = BeautifulStoneSoup(soup)

	# should look like "rtmp://cp53909.edgefcs.net/ondemand"
	# Looks like the ABC don't always include this field.
	# If not included, that's okay -- ABC usually gives us the server in the auth result as well.
	rtmp_url = xml.find('param', attrs={'name':'server_streaming'}).get('value')
	rtmp_chunks = rtmp_url.split('/')

	return {
		'rtmp_url'  : rtmp_url,
		'rtmp_host' : rtmp_chunks[2],
		'rtmp_app'  : rtmp_chunks[3],
		'auth_url'  : xml.find('param', attrs={'name':'auth'}).get('value'),
		'api_url' : xml.find('param', attrs={'name':'api'}).get('value'),
		'categories_url' : xml.find('param', attrs={'name':'categories'}).get('value'),
		'captions_url' : xml.find('param', attrs={'name':'captions'}).get('value'),
	}

Example #16

0

Show file

File: kinopoisk.py Project: igran/mnn-xbmc-repo

	def GetRating(self,id):

		self.GetInfo(id)

		url = 'http://rating.kinopoisk.ru/'+str(id)+'.xml'
		Data = Get_url(url)
		if Data:
			xml = BeautifulStoneSoup(Data)
			try:
				kp = xml.find('kp_rating')
				r_kp = kp.string.encode('UTF-8')
				v_kp = kp['num_vote'].encode('UTF-8')
			except:
				r_kp = '-'
				v_kp = '-'
			try:
				imdb = xml.find('imdb_rating')
				r_imdb =  imdb.string.encode('UTF-8')
				v_imdb = imdb['num_vote'].encode('UTF-8')
			except:
				r_imdb =  '-'
				v_imdb =  '-'
			return r_kp, v_kp, r_imdb, v_imdb
		else:
			return '-', '-', '-', '-'

Example #17

0

Show file

File: doubanapi.py Project: alswl/dbevent2gc

def getXmlCursor(xml):
    """解析xml，获取当前查询的起始位置，每页数量，总共数量"""
    soup = BeautifulStoneSoup(xml)
    start = int(soup.find('opensearch:startindex').string)
    count = int(soup.find('opensearch:itemsperpage').string)
    totalCount = int(soup.find('opensearch:totalresults').string)
    return (start, count, totalCount)

Example #18

0

Show file

File: extract_spectrum_info.py Project: Team-T2-NMR/NMR-T2

def extract_spectrum_info(filename):

    # Open the file
    with open(filename, 'r') as f:
        xml = f.read()

    # Extract the info
    soup = BeautifulStoneSoup(xml)
    info = {
        'id': soup.find('id').contents[0],
        'database-id': soup.find('database-id').contents[0],
        'inchi-key': soup.find('inchi-key').contents[0],
        'solvent': soup.find('solvent').contents[0],
        'sample-ph': soup.find('sample-ph').contents[0],
        'frequency': soup.find('frequency').contents[0]
    }

    # Enforce spectrum validity rules
    if (info['solvent'] == VALID_SOLVENT and float(info['sample-ph']) <= MAX_PH
            and float(info['sample-ph']) >= MIN_PH):

        # Get the inchi code
        try:
            info['inchi-code'] = \
                urllib2.urlopen(INCHI_RESOLVER_URL.format(info['inchi-key'])).read()
            time.sleep(TIME_BETWEEN_URL_REQUESTS)
            return info
        except:  # Any exception, don't care which
            return None
    else:
        return None

Example #19

0

Show file

File: douban.py Project: ikandou/django-social-auth

 def xml_to_dict (self, data):
     from BeautifulSoup import BeautifulStoneSoup as BS
     soup = BS(data)
     username = soup.find('db:uid').contents[0]
     uid = soup.find('id').contents[0].split('/')[-1]
     title = soup.find('title').contents[0]
     return {'id':uid, 'username':username,'title':title}

Example #20

0

Show file

File: crackle.py Project: AbsMate/bluecop-xbmc-repo

def play(url=common.args.url,playrtmp=True):
    # GET DETAILS FROM API
    #url = build_api_url('details','',ID=id,ios=True)
    #data = common.getURL(url)
    #if data:
    #    for mediaUrl in demjson.decode(data)['MediaURLs']:
    #        if mediaUrl['type'] == '480p_1mbps.mp4':
    #            finalurl=mediaUrl['path']

    # GET ID FROM HTTP PAGE
    #data = common.getURL(url)
    #id,paremeters=re.compile("StartPlayer \((.+?), '(.+?)',").findall(data)[0]
    
    #Get file path
    vidwall = 'http://www.crackle.com/app/vidwall.ashx?flags=-1&fm=%s&partner=20' % url
    data = common.getURL(vidwall)
    tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    filepath = tree.find('i')['p']

    if playrtmp:
        # RTMP STREAMING
        rtmpbase = tree.find('channel')['strrtmpcdnurl']
        swfUrl = 'http://www.crackle.com/flash/ReferrerRedirect.ashx'
        finalurl = 'rtmp://'+rtmpbase+'/'+filepath+'480p_1mbps.mp4'+' swfurl='+swfUrl+" swfvfy=true"
    else:
        # HTTP STREAMING
        finalurl = 'http://media-us-am.crackle.com/'+filepath+'480p_1mbps.mp4'

    item = xbmcgui.ListItem(path=finalurl)
    return xbmcplugin.setResolvedUrl(pluginhandle, True, item)

Example #21

0

Show file

File: update_conditions.py Project: taylanpince/blue-mountain

    def handle(self, *args, **kwargs):
        resource = urlopen(TRAIL_REPORT_URL)
        soup = BeautifulStoneSoup(resource)
        lift = soup.find("lifts")
        
        cache.set(TRAIL_REPORT_CACHE_KEY, {
            "total": lift.get("total"),
            "open": lift.get("totalopen"),
        }, 7 * 24 * 60 * 60)

        resource = urlopen(WEATHER_REPORT_URL)
        soup = BeautifulStoneSoup(resource)
        report = soup.findAll("report")[1]
        forecast = []

        weather_data = {
            "temperature": report.get("temp"),
            "surface": report.get("surfaceconditions"),
        }

        for i in range(1, 5):
            day = soup.find("day%d" % i)

            if day:
                forecast.append({
                    "day": day.get("day"),
                    "status": WEATHER_TYPES[int(day.get("icon"))],
                })

        weather_data["forecast"] = forecast

        cache.set(WEATHER_REPORT_CACHE_KEY, weather_data, 7 * 24 * 60 * 60)

Example #22

0

Show file

File: onlineConvert.py Project: SirFroweey/Python-Online-Convert

    def QueueStatus(cls, api_key, _hash):
        template_queue = \
                       """
    <?xml version="1.0" encoding="utf-8" ?>
    <queue>
      <apiKey>%s</apiKey>
      <hash>%s</hash>
    </queue>
                       """ %(api_key, _hash)

        site = "http://api.online-convert.com/queue-status"
        d = dict()
        d["queue"] = template_queue
        params = urllib.urlencode(d)
        conn = urllib.urlopen(site, params)
        data = conn.read()
        #
        d = dict()
        soup = BeautifulStoneSoup(data)
        try:
            d["code"] = soup.find("code").contents[0]
        except:
            d["code"] = -1
        try:
            d["download_counter"] = soup.find("downloadcounter").contents[0]
        except:
            d["download_counter"] = -1
        try:
            d["date_processed"] = soup.find("dateprocessed").contents[0]
        except:
            d["date_processed"] = -1
        try:
            d["direct_download"] = soup.find("directdownload").contents[0]
        except:
            d["direct_download"] = -1
        try:
            d["source_checksum"] = soup.find("source_checksum").contents[0]
        except:
            d["source_checksum"] = -1
        try:
            d["checksum"] = soup.find("checksum").contents[0]
        except:
            d["checksum"] = -1
        try:
            d["target_size"] = soup.find("target_size").contents[0] #in KB
        except:
            d["target_size"] = -1
        try:
            d["target_type"] = soup.find("convert_to").contents[0]
        except:
            d["target_type"] = -1
        try:
            d["mime_type"] = soup.find("mime_type").contents[0]
        except:
            d["mime_type"] = -1
        try:
            d["hash"] = soup.find("hash").contents[0]
        except:
            d["hash"] = -1
        return d

Example #23

0

Show file

File: receive_ankicrashes.py Project: morristech/ankidroid-triage

 def updateStatusPriority(self):
     url = r"http://code.google.com/feeds/issues/p/ankidroid/issues/full?id=" + str(
         self.issueName)
     updated = False
     try:
         result = fetch(url)
         if result.status_code == 200:
             soup = BeautifulStoneSoup(result.content)
             status = soup.find('issues:status')
             if status:
                 self.status = unicode(status.string)
                 updated = True
                 logging.debug("Setting status to '" + self.status + "'")
             priority = soup.find(name='issues:label',
                                  text=re.compile(r"^Priority-.+$"))
             if priority:
                 self.priority = re.search("^Priority-(.+)$",
                                           unicode(
                                               priority.string)).group(1)
                 updated = True
                 logging.debug("Setting priority to '" + self.priority +
                               "'")
     except Error, e:
         logging.error("Error while retrieving status and priority: %s" %
                       str(e))

Example #24

0

Show file

File: default.py Project: EricKnecht/xbmc-video-smallscreennetwork

def episodes(url):
        print "Episodes: " + url
        req = urllib2.Request(url)
        req.add_header('User-Agent',userAgent)
              
        response = urllib2.urlopen(req)
        
        link=response.read()
        soup = BeautifulStoneSoup(link)
        response.close()
        episodes=re.compile('<a href="(.+?)">\s*<img src="(.+?)".*>\s*<h3>(.+?)</h3>').findall(link)
       
        count = len(episodes)
        
                
        prevPage = soup.find('a', text="&lt;")
       
        if( prevPage != None):
                count = count + 1
               
                addDir("<< Previous Page", prevPage.parent['href'], 1, '', count)
		
		
        for href, thumb, name in episodes:
            addLink(name,site+href,site+thumb,count)
		
        nextPage = soup.find('a', text="&gt;")
        
        if( nextPage != None):
              
                addDir("Next Page >>", nextPage.parent['href'], 1, '', count)

Example #25

0

Show file

File: host_list.py Project: JS-Burns/EQGRP_Lost_in_Translation

def parse_config(file_to_read):
    parsed = BeautifulStoneSoup(open(file_to_read).read())
    adapters = parsed.findAll('adapter')
    if (not adapters):
        adapters = parsed.findAll('interface')
    host_tag = parsed.find('hostname')
    if host_tag:
        host_name = host_tag.string.lower()
    else:
        host_name = None
    domain_tag = parsed.find('domainname')
    if domain_tag:
        domain_name = domain_tag.string
        if domain_name:
            domain_name = domain_name.lower()
    else:
        domain_name = None
    ip_list = []
    for adapter in adapters:
        mac = (adapter.find('address').string if adapter.find('address') else None)
        if mac:
            mac = mac.replace('-', ':').lower()
        adapter_ips = adapter.findAll('adapterip')
        for adapter_ip_node in adapter_ips:
            if (not adapter_ip_node):
                continue
            ip = ''
            for ip_address in adapter_ip_node.find('ip'):
                ip = ip_address.string.strip()
                if (not ip):
                    continue
                info = {'host_name': host_name, 'domain_name': domain_name, 'ip_address': ip, 'mac_address': mac}
                if ((info not in ip_list) and (ip != '127.0.0.1') and (':' not in ip)):
                    ip_list.append(info)
    return ip_list

Example #26

0

Show file

File: soap.py Project: wcirillo/ten

def process_save_order_response(response, ad_rep_order, order_dict):
    """ 
    Validate response from save order soap call. If success, a firestorm
    order id will be saved to the ad_rep order. 
    """
    LOG.debug('SaveOrder Response: %s' % response)
    soup = BeautifulStoneSoup(response)
    LOG.debug(soup.prettify())
    if soup.find('status').string == 'SUCCESS':
        firestorm_order_id = soup.find('id').string
        LOG.debug("Success: Firestorm Order ID: %s" % 
            firestorm_order_id)
        try:
            ad_rep_order = AdRepOrder.objects.get(order=ad_rep_order.order)
            ad_rep_order.firestorm_order_id = int(firestorm_order_id)
            ad_rep_order.save()
        except ValueError:
            LOG.error("Invalid Firestorm Order ID: %s" % firestorm_order_id)
    else:
        LOG.error('AdRepOrder %s order_dict: %s' % (ad_rep_order.order, 
            order_dict))
        errors = ''
        for error_msg in soup.findAll('errormsg'):
            LOG.error(error_msg.string)
            errors += error_msg.string + '. '
        error_message = 'SaveOrder: %s error: %s' % (ad_rep_order.order, 
            errors)
        LOG.error(error_message)
        raise ValidationError(error_message)

Example #27

0

Show file

File: get-spending-by-division.py Project: plural/milwaukee-county-spending

def vendorWorker(thread_num, l):
  while (True and processVendors):
    detail = vendors_queue.get()
    sys.stderr.write('Parsing vendors for category detail for %s at %s\n' % (
      detail['detail_description'], detail['vendor_url']))
    my_mech = mechanize.Browser()
    vendor_request = my_mech.open(detail['vendor_url'])
    vendor_response = vendor_request.read()
    vendor_soup = BeautifulStoneSoup(vendor_response,
       convertEntities=BeautifulStoneSoup.ALL_ENTITIES)
    vendor_table = vendor_soup.find('table', id='grdAgency')
    vendor_page_dropdown = vendor_soup.find('select',
        id='MozillaPager1_ddlPageNumber')
    vendor_pages = []
    if vendor_page_dropdown:
      for vendor_page in vendor_page_dropdown.findAll('option'):
        vendor_pages.append(vendor_page['value'])
    else:
      sys.stderr.write("No page drop down on %s.\n" % detail['vendor_url'])

    vendors = parseVendorTable(vendor_response)
    l.acquire()
    detail['vendors'].extend(vendors)
    l.release()

    for page in vendor_pages[1:]:
      sys.stderr.write(' ... Page %s from %s\n' % (page, detail['vendor_url']))
      my_mech.select_form('ctl02')
      my_mech.form.set_value([page], 'MozillaPager1$ddlPageNumber')
      vendor_page_request = my_mech.form.click('MozillaPager1$btnPageNumber')
      vendors = parseVendorTable(mechanize.urlopen(vendor_page_request).read())
      l.acquire()
      detail['vendors'].extend(vendors)
      l.release()
    vendors_queue.task_done()

Example #28

0

Show file

File: tv.py Project: shaifbari/XBMC-Amazon.com-Prime-Streaming

def tv_db_id_lookup(seriesid,seriesname):
    tv_api_key = '03B8C17597ECBD64'
    mirror = 'http://thetvdb.com'
    banners = 'http://thetvdb.com/banners/'
    if seriesid:
        series_xml = mirror+('/api/%s/series/%s/en.xml' % (tv_api_key, seriesid))
        series_xml = common.getURL(series_xml)
        tree = BeautifulStoneSoup(series_xml, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
        try:
            genre = tree.find('genre').string
            genre = genre.replace("|",",")
            genre = genre.strip(",")
        except:
            print '%s - Genre Failed' % seriesname
            genre = None
        try: aired = tree.find('firstaired').string
        except:
            print '%s - Air Date Failed' % seriesname
            aired = None
        try: banner = banners + tree.find('banner').string
        except:
            print '%s - Banner Failed' % seriesname
            banner = None
        try: fanart = banners + tree.find('fanart').string
        except:
            print '%s - Fanart Failed' % seriesname
            fanart = None
        try: poster = banners + tree.find('poster').string
        except:
            print '%s - Poster Failed' % seriesname
            poster = None
        return banner, poster, fanart, genre, aired, seriesid
    else:
        return None,None,None,None,None,None

Example #29

0

Show file

File: ofxparse.py Project: Naveg/ofxparse

    def parse(cls_, file_handle, fail_fast=True):
        '''
        parse is the main entry point for an OfxParser. It takes a file
        handle and an optional log_errors flag.

        If fail_fast is True, the parser will fail on any errors.
        If fail_fast is False, the parser will log poor statements in the
        statement class and continue to run. Note: the library does not
        guarantee that no exceptions will be raised to the caller, only
        that statements will include bad transactions (which are marked).

        '''
        cls_.fail_fast = fail_fast

        if isinstance(file_handle, type('')):
            raise RuntimeError(u"parse() takes in a file handle, not a string")

        ofx_obj = Ofx()

        # Store the headers
        ofx_file = OfxFile(file_handle)
        ofx_obj.headers = ofx_file.headers
        ofx_obj.accounts = []

        ofx = BeautifulStoneSoup(ofx_file.fh)
        if len(ofx.contents) == 0:
            raise OfxParserException('The ofx file is empty!')

        stmtrs_ofx = ofx.findAll('stmtrs')
        if stmtrs_ofx:
            ofx_obj.accounts += cls_.parseStmtrs(stmtrs_ofx, AccountType.Bank)

        ccstmtrs_ofx = ofx.findAll('ccstmtrs')
        if ccstmtrs_ofx:
            ofx_obj.accounts += cls_.parseStmtrs(
                ccstmtrs_ofx, AccountType.CreditCard)

        invstmtrs_ofx = ofx.findAll('invstmtrs')
        if invstmtrs_ofx:
            ofx_obj.accounts += cls_.parseInvstmtrs(invstmtrs_ofx)
            seclist_ofx = ofx.find('seclist')
            if seclist_ofx:
                ofx_obj.security_list = cls_.parseSeclist(seclist_ofx)
            else:
                ofx_obj.security_list = None

        acctinfors_ofx = ofx.find('acctinfors')
        if acctinfors_ofx:
            ofx_obj.accounts += cls_.parseAcctinfors(acctinfors_ofx, ofx)

        fi_ofx = ofx.find('fi')
        if fi_ofx:
            for account in ofx_obj.accounts:
                account.institution = cls_.parseOrg(fi_ofx)

        if ofx_obj.accounts:
            ofx_obj.account = ofx_obj.accounts[0]

        return ofx_obj

Example #30

0

Show file

 def extract_tokens(self, html):
     soup = BeautifulStoneSoup(
         html, convertEntities=BeautifulStoneSoup.ALL_ENTITIES)
     title = soup.find('title').text
     self.title = title
     body = soup.find('body')
     tokens = self.find_tokens(body)
     self.tokens = tokens

Example #31

0

Show file

File: scraper.py Project: badboyilprimo/attempt1

def getParams():
    query = scrape(queryPage)
    querySoup = BeautifulStoneSoup(query,convertEntities=BeautifulStoneSoup.HTML_ENTITIES)

    flatTypes = [(tag['value'],tag.contents[0]) for tag in querySoup.find('select', {'name':'FLAT_TYPE'}  ).findAll('option') if len(tag['value'])>0]
    towns      = [tag['value'] for tag in querySoup.find('select', {'name':'NME_NEWTOWN'}).findAll('option') if len(tag['value'])>0]
    dates      = [tag['value'] for tag in querySoup.find('select', {'name':'DTE_APPROVAL_FROM'}).findAll('option') if len(tag['value'])>0]
    return (dict(flatTypes), towns, dates)

Example #32

0

Show file

File: popcorntv.py Project: alkalbani/plugin.video.popcorntv

 def getVideoURL(self, smilUrl):
     data = urllib2.urlopen(smilUrl).read()
     htmlTree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
     
     base = htmlTree.find('meta')['base']
     filepath = htmlTree.find('video')['src']
     url = base + " playpath=" + filepath
     
     return url

Example #33

0

Show file

File: vlille.py Project: micpalmia/PyBikes

 def update(self):
   usock = urllib.urlopen(self.main_url + self.station_url + str(self.number))
   xml_data = usock.read()
   usock.close()
   soup = BeautifulStoneSoup(xml_data)
   self.bikes = int(soup.find('bikes').contents[0])
   self.free = int(soup.find('attachs').contents[0])
   self.timestamp = datetime.now()
   return self

Example #34

0

Show file

def play(url=common.args.url):
    print "DEBUG Entering play function"
    swfUrl = 'http://can.cbs.com/thunder/player/chrome/canplayer.swf'
    if 'http://' in url:
        data = common.getURL(url)
        try:
            pid = re.compile('var pid = "(.*?)";').findall(data)[0]
        except:
            pid = re.compile("var pid = '(.*?)';").findall(data)[0]
    else:
        pid = url
    # OLD URL
    #url = "http://release.theplatform.com/content.select?format=SMIL&Tracking=true&balance=true&MBR=true&pid=" + pid
    url = "http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&Tracking=true&mbr=true" % pid
    if (common.settings['enableproxy'] == 'true'):
        proxy = True
    else:
        proxy = False
    data = common.getURL(url, proxy=proxy)
    tree = BeautifulStoneSoup(data,
                              convertEntities=BeautifulStoneSoup.XML_ENTITIES)

    if (common.settings['enablesubtitles'] == 'true'):
        closedcaption = tree.find('param', attrs={'name': 'ClosedCaptionURL'})
        if (closedcaption is not None):
            xml_closedcaption = common.getURL(closedcaption['value'])
            convert_subtitles(xml_closedcaption, pid)

    rtmpbase = tree.find('meta')
    if rtmpbase:
        rtmpbase = rtmpbase['base']
        items = tree.find('switch').findAll('video')
        hbitrate = -1
        sbitrate = int(common.settings['quality']) * 1024
        for item in items:
            bitrate = int(item['system-bitrate'])
            if bitrate > hbitrate and bitrate <= sbitrate:
                hbitrate = bitrate
                playpath = item['src']
                if '.mp4' in playpath:
                    playpath = 'mp4:' + playpath
                else:
                    playpath = playpath.replace('.flv', '')
                finalurl = rtmpbase + ' playpath=' + playpath + " swfurl=" + swfUrl + " swfvfy=true"
    item = xbmcgui.ListItem(path=finalurl)
    xbmcplugin.setResolvedUrl(pluginhandle, True, item)
    if (common.settings['enablesubtitles'] == 'true') and (closedcaption
                                                           is not None):
        while not xbmc.Player().isPlaying():
            print 'CBS--> Not Playing'
            xbmc.sleep(100)

        subtitles = os.path.join(common.pluginpath, 'resources', 'cache',
                                 pid + '.srt')
        print "CBS --> Setting subtitles"
        xbmc.Player().setSubtitles(subtitles)

Example #35

0

Show file

File: ldap-opennms-user-sync.py Project: mattbennett/ldap-opennms-user-sync

def main():
    """
	"""
    #process options
    usage = "Usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("-d",
                      "--dry-run",
                      dest="dry_run",
                      help="don't actually write the output file",
                      action="store_true",
                      default=False)
    (options, args) = parser.parse_args()

    #parse the xml file with beautifulsoup
    with open(INPUT_PATH) as fh:
        soup = BeautifulStoneSoup(fh.read(), selfClosingTags=['contact'])

    #purge read-only (i.e. LDAP) users from the XML soup
    for user in soup.findAll('user', attrs={'read-only': 'true'}):
        user.extract()

    #connect to ldap, get list of users
    con = bind(USERNAME, PASSWORD, HOST, DOMAIN)
    if globals().has_key('MEMBER_GROUPS'):
        memberships = []
        for group in MEMBER_GROUPS:
            memberships.append("(memberOf=cn=%s,%s)" % (group, BASE_DN))
        users = search(con,
                       BASE_DN,
                       searchFilter=u'(|%s)' % "".join(memberships))
    else:
        users = search(con, BASE_DN)

    #add user tag for each user in list to the XML soup
    for user in users:
        dn, attrs = user
        add_user_tag(soup, attrs)

    #update timestamp
    soup.find('created').string.replaceWith(
        time.strftime("%A, %d %B %Y %H:%M:%S o'clock %Z"))

    #BeautifulSoup / SGMLParser will lowercase 'serviceProvider' because all attributes in XML
    #should be lowercase. But OpenNMS is case-sensitive, so we force it back to being broken
    out = re.sub("serviceprovider", "serviceProvider", soup.prettify())

    #write file or show what we would've written
    if not options.dry_run:
        with open(OUTPUT_PATH, 'w') as fh:
            fh.write(out)
    else:
        print "Generated:\n"
        print out

    return 0

Example #36

0

Show file

File: main.py Project: danl3v/nextbusapi

 def get(self, agency, line, direction):
     directions = get_xml('http://webservices.nextbus.com/service/publicXMLFeed?command=routeConfig&a=' + agency + '&r=' + line)
     soup = BeautifulStoneSoup(directions, selfClosingTags=['stop'])
     stop_ids = soup.find('direction', tag=direction).findAll('stop')
     html = '<?xml version="1.0" encoding="utf-8" ?><body>'
     for stop_id in stop_ids:
         stop = soup.find('stop', tag=stop_id['tag'])
         html += '<choice tag="' + stop['tag'] + '" title="' + stop['title'].replace("&", "and") + '">'
     html += '</body>'
     self.response.out.write(html)

Example #37

0

Show file

File: natgeowild.py Project: AbsMate/bluecop-xbmc-repo

def play(url = common.args.url):
    videoname = url.split('/')[-2]
    smil = 'http://video.nationalgeographic.com/video/player/data/xml/%s.smil' % videoname
    data = common.getURL(smil)
    tree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
    base = tree.find('meta',attrs={'name':'httpBase'})['content']
    filepath = tree.find('video')['src']
    final = base + filepath+'?v=1.2.17&fp=MAC%2011,1,102,62'+'&r='+randomstring(5)+'&g='+randomstring(12)
    item = xbmcgui.ListItem(path=final)
    xbmcplugin.setResolvedUrl(pluginhandle, True, item)

Example #38

0

Show file

File: ebgcalconnect.py Project: AlfiyaZi/Grab-Bag

  def _getCourseListing(self):
	xml = urllib2.urlopen(ebRss)
	soup = BeautifulStoneSoup(xml)
	tags = soup.findAll('link')
	
	eids = []
	
	courses = {}
	
	global venue
	
	for tag in tags:
		match = re.search(r"(event/)(\d+)(/rss)", str(tag))
		if match: 
			print "Found EventBrite ID %s : %s"%(match.group(2), str(tag))
			eids.append(match.group(2))
			
	for eid in eids:
		print "Querying EventBrite API for %s"%(eid)
		
		xml = urllib2.urlopen('https://www.eventbrite.com/xml/event_get?app_key=%s&id=%s'%(appkey, eid))
		soup = BeautifulStoneSoup(xml)
		startdate = self._fixText(soup.find('start_date'))
		enddate = self._fixText(soup.find('end_date'))
		title = self._fixText(soup.find('title'))
		#desc = self._fixText(soup.find('description'))
		
		if not venue:
			venueXML = soup.find('venue')
			name = str(venueXML.find('name'))
			address = str(venueXML.find('address'))
			address2 = str(venueXML.find('address_2'))
			city = str(venueXML.find('city'))
			region = str(venueXML.find('region'))
			zip = str(venueXML.find('postal_code'))
			list = [name, address, address2, city, region]
			venue = self._fixText(", ".join(list) + " " + zip)
			print "Setting Venue: " + venue
		
		urls = soup.findAll('url')
		url = ""
		for addr in urls:
			m = re.search(r"\d+", str(addr))
			if m:
				url = self._fixText(addr)
		
		startdate = time.gmtime(time.mktime(time.strptime(startdate, "%Y-%m-%d %H:%M:%S")))
		enddate = time.gmtime(time.mktime(time.strptime(enddate, "%Y-%m-%d %H:%M:%S")))
		
		desc = '<a href="%s">Click Here</a> for more info.'%(url)
		
		thisCourse = {'title':title, 'desc':desc, 'startdate':startdate, 'enddate':enddate, 'url':url}
		
		courses[eid] = thisCourse
	return courses

Example #39

0

Show file

File: EPP.py Project: dark-reiser/Python3-EPP

 def login(self):
     """ Read greeting """
     greeting = self.read()
     soup = BeautifulStoneSoup(greeting)
     svid = soup.find('svid')
     version = soup.find('version')
     print("Connected to %s (v%s)\n" % (svid.text, version.text))
     """ Login """
     xml = commands.login % self.config
     if not self.cmd(xml, silent=True):
         exit(1)

Example #40

0

Show file

File: read_jatsxml.py Project: danieldmm/minerva

    def read(self, xml, identifier):
        """
            Load a JATS/NLM (PubMed) XML into a SciDoc.

            :param xml: full xml string
            :type xml: basestring
            :param identifier: an identifier for this document, e.g. file name
                        If an actual full path, the path will be removed from it
                        when stored
            :type identifier: basestring
            :returns: :class:`SciDoc <SciDoc>` object
            :rtype: SciDoc
        """
        # this solves a "bug" in BeautifulStoneSoup with "sec" tags
        BeautifulStoneSoup.NESTABLE_TAGS["sec"]=[]
        #xml=fixNumberCitationsXML(xml)
        soup=BeautifulStoneSoup(xml)

        # Create a new SciDoc to store the paper
        newDocument=SciDoc()
        metadata=newDocument["metadata"]
        metadata["filename"]=os.path.basename(identifier)
        metadata["original_citation_style"]=detectCitationStyle(xml)

        body=soup.find("body")
        if not body:
            # TODO: Make the error handling less terrible
            debugAddMessage(newDocument,"error","NO <BODY> IN THIS PAPER! file: "+identifier)
            newDocument["metadata"]["guid"]=cp.Corpus.generateGUID()
            return newDocument

        # Load metadata, either from corpus or from file
        self.loadJATSMetadataFromPaper(newDocument, soup)
        metadata["guid"]=cp.Corpus.generateGUID(metadata)

        # Load all references from the XML
        back=soup.find("back")
        if back:
            ref_list=back.find("ref-list")
            # other things in <back> like appendices: ignore them for now
            if ref_list:
                for ref in ref_list.findAll("ref"):
                    self.loadJATSReference(ref, newDocument)

        newDocument.updateReferences()

        # Load Abstract
        self.loadJATSAbstract(soup,newDocument)

        for sec in body.findChildren("sec", recursive=False):
            self.loadJATSSection(sec, newDocument, "root")

        newDocument.updateAuthorsAffiliations()
        return newDocument

Example #41

0

Show file

File: epp.py Project: PivotPointSecurity/Python-EPP

    def login(self):
        """ Read greeting """
        greeting = self.read()
        soup = BeautifulStoneSoup(greeting)
        svid = soup.find('svid')
        version = soup.find('version')
        print("Connected to %s (v%s)\n" % (svid.text, version.text))
        """ Login """
        xml = commands.login % self.config

        if not self.cmd(xml):
            raise Exception('Error: Unable to login')

Example #42

0

Show file

def play(url=common.args.url):
    videoname = url.split('/')[-2]
    smil = 'http://video.nationalgeographic.com/video/player/data/xml/%s.smil' % videoname
    data = common.getURL(smil)
    tree = BeautifulStoneSoup(data,
                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
    base = tree.find('meta', attrs={'name': 'httpBase'})['content']
    filepath = tree.find('video')['src']
    final = base + filepath + '?v=1.2.17&fp=MAC%2011,1,102,62' + '&r=' + randomstring(
        5) + '&g=' + randomstring(12)
    item = xbmcgui.ListItem(path=final)
    xbmcplugin.setResolvedUrl(pluginhandle, True, item)

Example #43

0

Show file

File: das.py Project: zkpt-org/tindproc

 def auth(self):
     response = self.get_ticket_granting_ticket(self.USER, self.PASS)
     
     html = BeautifulSoup(response)
     tgt  = html.body.form["action"]
     
     st  = self.get_service_ticket(tgt)
     vld = self.validate_service(st)
     xml = BeautifulStoneSoup(vld)
     iou = xml.find('cas:proxygrantingticket').string if xml.find('cas:proxygrantingticket') else None
     self.PGT = self.get_proxy_granting_ticket(iou)
     return self.PGT

Example #44

0

Show file

File: cbs.py Project: yultide/BlueCop-XBMC-Plugins

def play(url = common.args.url):
    print "DEBUG Entering play function"
    swfUrl = 'http://can.cbs.com/thunder/player/chrome/canplayer.swf'
    if 'http://' in url:
        data=common.getURL(url)
        try:
            pid = re.compile('video.settings.pid = "(.*?)";').findall(data)[0]
        except:
            pid = re.compile("video.settings.pid = '(.*?)';").findall(data)[0]
    else:
        pid = url  
    # OLD URL
    #url = "http://release.theplatform.com/content.select?format=SMIL&Tracking=true&balance=true&MBR=true&pid=" + pid
    url = "http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&Tracking=true&mbr=true" % pid
    if (common.settings['enableproxy'] == 'true'):
        proxy = True
    else:
        proxy = False
    data=common.getURL(url,proxy=proxy)
    tree=BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
    
    if (common.settings['enablesubtitles'] == 'true'):
        closedcaption = tree.find('param',attrs={'name':'ClosedCaptionURL'})
        if (closedcaption is not None):
            xml_closedcaption = common.getURL(closedcaption['value'])
            convert_subtitles(xml_closedcaption,pid)

    rtmpbase = tree.find('meta')
    if rtmpbase:
        rtmpbase = rtmpbase['base']
        items=tree.find('switch').findAll('video')
        hbitrate = -1
        sbitrate = int(common.settings['quality']) * 1024
        for item in items:
            bitrate = int(item['system-bitrate'])
            if bitrate > hbitrate and bitrate <= sbitrate:
                hbitrate = bitrate
                playpath = item['src']
                if '.mp4' in playpath:
                    playpath = 'mp4:'+playpath
                else:
                    playpath = playpath.replace('.flv','')
                finalurl = rtmpbase+' playpath='+playpath + " swfurl=" + swfUrl + " swfvfy=true"
    item = xbmcgui.ListItem(path=finalurl)
    xbmcplugin.setResolvedUrl(pluginhandle, True, item)
    if (common.settings['enablesubtitles'] == 'true') and (closedcaption is not None):
        while not xbmc.Player().isPlaying():
            print 'CBS--> Not Playing'
            xbmc.sleep(100)
    
        subtitles = os.path.join(common.pluginpath,'resources','cache',pid+'.srt')
        print "CBS --> Setting subtitles"
        xbmc.Player().setSubtitles(subtitles)

Example #45

0

Show file

File: ziln.py Project: dteirney/plugin.video.nz.ondemand

def RESOLVE(index): #, info
 doc = tools.gethtmlpage("%s/playlist/null/%s" % (ziln_urls["ZILN"], index))
 if doc:
  soup = BeautifulStoneSoup(doc)
  #tools.message(soup.find('media:content')["url"])
  #minidom.parseString(doc).documentElement.getElementsByTagName("media:content")[0].attributes["url"].value
  info = tools.defaultinfo(0)
  info["Title"] = soup.find('item').title.contents[0]
  info["Thumb"] = soup.find('jwplayer:image').contents[0]
  info["Plot"] = soup.find('description').contents[0]
  uri = "%s%s" % (ziln_urls["ZILN"], soup.find('media:content')["url"])
  tools.addlistitem(int(sys.argv[1]), info, ziln_urls["Fanart"], 0, 1, uri)

Example #46

0

Show file

File: pubmedcentral.py Project: atiw003/pypub

def _extract_pmid_links_from_xml(raw_xml):
    try:
        soup = BeautifulStoneSoup(raw_xml)
        # Verify we got a valid page
        assert(soup.find("elinkresult"))  
        # Now get the linkset part
        linksetdb_soup = BeautifulStoneSoup(str(soup.find(text="pmc_pubmed").findParents('linksetdb'))[1:-1])
        pmids = list_of_tag_contents(linksetdb_soup, "id")
    except AttributeError:
        # No links found
        pmids = []
    return(pmids)

Example #47

0

Show file

File: jcdecauxstation.py Project: micpalmia/PyBikes

 def update(self, prefix=""):
     #print "Updating "+str(self.number)
     if self.station_url == "":
         self.station_url = STATION_URL % self.city
     usock = urllib.urlopen(prefix + self.main_url + self.station_url +
                            str(self.number))
     xml_data = usock.read()
     usock.close()
     soup = BeautifulStoneSoup(xml_data)
     self.bikes = int(soup.find('available').contents[0])
     self.free = int(soup.find('free').contents[0])
     self.timestamp = datetime.now()
     return self

Example #48

0

Show file

def _extract_pmid_links_from_xml(raw_xml):
    try:
        soup = BeautifulStoneSoup(raw_xml)
        # Verify we got a valid page
        assert (soup.find("elinkresult"))
        # Now get the linkset part
        linksetdb_soup = BeautifulStoneSoup(
            str(soup.find(text="pmc_pubmed").findParents('linksetdb'))[1:-1])
        pmids = list_of_tag_contents(linksetdb_soup, "id")
    except AttributeError:
        # No links found
        pmids = []
    return (pmids)

Example #49

0

Show file

File: sharples.py Project: iambikash007/gazjango

def get_menu(url=FEED_URL, tomorrow=False, die_on_closed=False):
    """
    Builds a Sharples menu from ``url``, returning a dictionary like this:
    
    { 
        'closed': False, 
        'message': "",
        'lunch': "beef vegetable soup, potato leek, ...",
        'dinner': "flank steak, baked stuffed potatoes, ..."
    }
    
    Note that we still return the menu if Sharples is closed, unless
    ``die_on_closed`` is set.
    
    If ``tomorrow`` is set, tries to figure out the menu for tomorrow.
    """
    try:
        page = urllib2.urlopen(url)
    except urllib2.URLError:
        # TODO: log this error somehow
        message = "Sorry, it seems we're having some technical difficulties " \
                  "with figuring out the Sharples menu. Try checking the " \
                  "Dashboard or the Sharples website."
        return {'closed': True, 'message': message}

    feed = BeautifulStoneSoup(page, selfClosingTags=['closed'])
    data = {}

    data['closed'] = feed.closed['value'] == "1"
    data['message'] = feed.message.string or ""
    if data['closed'] and die_on_closed:
        return data

    week = feed.find("week", {'currentwk': '1'})

    if tomorrow:
        day_name = (date.today() + timedelta(days=1)).strftime("%A")
        if day_name == "Saturday":
            num = int(week['value']) + 1
            if num > NUM_WEEKS:
                num = 1
            week = feed.find("week", {'value': str(num)})
    else:
        day_name = date.today().strftime("%A")

    for item in week.find("day", {'value': day_name}).findAll("item"):
        meal = item.meal.string.strip()
        if meal:
            data[meal.lower()] = br.sub("<br />", item.menu.string.strip())

    return data

Example #50

0

Show file

File: ofx.py Project: geraldoandradee/django-openportfolio

    def parse(cls_, file_handle):
        ofx_obj = Ofx()
        ofx = BeautifulStoneSoup(file_handle)
        stmtrs_ofx = ofx.find('stmtrs')
        if stmtrs_ofx:
            ofx_obj.bank_account = cls_.parseStmtrs(stmtrs_ofx)

        #westpac has "CCSTMTRS"
        else:
            stmtrs_ofx = ofx.find('ccstmtrs')
            if stmtrs_ofx:
                ofx_obj.bank_account = cls_.parseStmtrs(stmtrs_ofx)

        return ofx_obj

Example #51

0

Show file

def GET_RTMP(vid):
    #url = 'http://www.tbs.com/video/cvp/videoData.jsp?oid='+vid
    url = 'http://www.tbs.com/tveverywhere/content/services/cvpXML.do?titleId=' + vid
    html = common.getURL(url)
    tree = BeautifulStoneSoup(html,
                              convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
    #print tree.prettify()
    files = tree.findAll('file')
    if not files:
        url = 'http://www.tbs.com/tveverywhere/content/services/cvpXML.do?titleId=&id=' + vid
        html = common.getURL(url)
        tree = BeautifulStoneSoup(
            html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
        #print tree.prettify()
        files = tree.findAll('file')
    if files:
        html = common.getURL(url)
        tree = BeautifulStoneSoup(
            html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
        print tree.prettify()
        sbitrate = int(common.settings['quality'])
        hbitrate = -1
        files = tree.findAll('file')
        for filenames in files:
            try:
                bitrate = int(filenames['bitrate'])
            except:
                bitrate = 1
            if bitrate > hbitrate and bitrate <= sbitrate:
                hbitrate = bitrate
                filename = filenames.string
        serverDetails = tree.find('akamai')
        if serverDetails:
            filename = filename[1:len(filename) - 4]
            serverDetails = tree.find('akamai')
            server = serverDetails.find('src').string.split('://')[1]
            #get auth
            tokentype = serverDetails.find('authtokentype').string
            window = serverDetails.find('window').string
            aifp = serverDetails.find('aifp').string
            auth = getAUTH(aifp, window, tokentype, vid,
                           filename.replace('mp4:', ''))
            swfUrl = 'http://www.tbs.com/cvp/tbs_video.swf'
            link = 'rtmpe://' + server + '?' + auth + " swfurl=" + swfUrl + " swfvfy=true" + ' playpath=' + filename
        elif 'http://' in filename:
            link = filename
        else:
            link = 'http://ht.cdn.turner.com/tbs/big' + filename
        return link

Example #52

0

Show file

File: parse.py Project: drewsonne/xbmc-addon-abc-iview

def parse_auth(soup, iview_config):
    """	There are lots of goodies in the auth handshake we get back,
		but the only ones we are interested in are the RTMP URL, the auth
		token, and whether the connection is unmetered.
	"""

    xml = BeautifulStoneSoup(soup)

    # should look like "rtmp://203.18.195.10/ondemand"
    try:
        rtmp_url = xml.find('server').string

        # at time of writing, either 'Akamai' (usually metered) or 'Hostworks' (usually unmetered)
        stream_host = xml.find('host').string

        playpath_prefix = ''
        if stream_host == 'Akamai':
            playpath_prefix = config.akamai_playpath_prefix

        if rtmp_url is not None:
            # Being directed to a custom streaming server (i.e. for unmetered services).
            # Currently this includes Hostworks for all unmetered ISPs except iiNet.
            rtmp_chunks = rtmp_url.split('/')
            rtmp_host = rtmp_chunks[2]
            rtmp_app = rtmp_chunks[3]
        else:
            # We are a bland generic ISP using Akamai, or we are iiNet.
            rtmp_url = iview_config['rtmp_url']
            rtmp_host = iview_config['rtmp_host']
            rtmp_app = iview_config['rtmp_app']

        token = xml.find("token").string
        token = token.replace('&amp;', '&')  # work around BeautifulSoup bug

    except:
        d = xbmcgui.Dialog()
        d.ok('iView Error', 'There was an iView handshake error.',
             'Please try again later')
        return None

    return {
        'rtmp_url': rtmp_url,
        'rtmp_host': rtmp_host,
        'rtmp_app': rtmp_app,
        'playpath_prefix': playpath_prefix,
        'token': token,
        'free': (xml.find("free").string == "yes")
    }

Example #53

0

Show file

File: play.py Project: lukewiersma/redcop

def PLAYVIDEO():
    #common.login()
    #orgin = 'http://dish.epixhd.com/epx/ajax/user/originstatus/'
    #print common.getURL(orgin,useCookie=True)
    #pageurl = 'http://www.epixhd.com/epx/ajax/theater/soloplayer'+common.args.url
    #print common.getURL(pageurl,useCookie=True)
    smilurl = 'http://www.epixhd.com/epx/smil'+common.args.url+'smil.xml'
    data = common.getURL(smilurl,useCookie=True)
    authurl = 'http://www.epixhd.com/epx/ajax/theater/getToken/?movie='+common.args.url.strip('/')
    auth = common.getURL(authurl,useCookie=True)
    tree = BeautifulStoneSoup(data, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
    print tree.prettify()
    stackedUrl = 'stack://'
    if common.addon.getSetting("enablepreroll") == 'true':
        for preroll in tree.find('img').findAll('video',recursive=False):
            stackedUrl += buildrtmp(preroll['src'],auth).replace(',',',,')+' , '    
    quality = [0,3000000,2200000,1700000,1200000,900000,500000]
    lbitrate = quality[int(common.addon.getSetting("bitrate"))]
    mbitrate = 0
    streams = []
    movie_name = tree.find('mbrstream')['ma:asset_name']
    common.args.asset_id = tree.find('mbrstream')['ma:asset_id']
    for item in tree.find('mbrstream').findAll('video'):
        url = item['src']
        bitrate = int(item['system-bitrate'])
        if lbitrate == 0:
            streams.append([bitrate/1000,url])
        elif bitrate >= mbitrate and bitrate <= lbitrate:
            mbitrate = bitrate
            rtmpdata = url
    if lbitrate == 0:        
        quality=xbmcgui.Dialog().select('Please select a quality level:', [str(stream[0])+'kbps' for stream in streams])
        if quality!=-1:
            rtmpdata = streams[quality][1]
        else:
            return

    stackedUrl += buildrtmp(rtmpdata,auth).replace(',',',,')    
    #p=ResumePlayer()
    
    item = xbmcgui.ListItem(path=stackedUrl)
    #item.setInfo( type="Video", infoLabels={"Title": movie_name})
    xbmcplugin.setResolvedUrl(pluginhandle, True, item)
    
    #while not p.isPlaying():
    #    print 'EPIX --> Not Playing'
    #    xbmc.sleep(100)
    #p.onPlayBackStarted()

Example #54

0

Show file

File: api_tvdb.py Project: drbashar315/Flexget

def find_series_id(name):
    """Looks up the tvdb id for a series"""
    url = server + 'GetSeries.php?seriesname=%s&language=%s' % (
        urllib.quote(name), language)
    try:
        page = requests.get(url).content
    except RequestException as e:
        raise LookupError("Unable to get search results for %s: %s" %
                          (name, e))
    xmldata = BeautifulStoneSoup(
        page, convertEntities=BeautifulStoneSoup.HTML_ENTITIES).data
    if not xmldata:
        log.error("Didn't get a return from tvdb on the series search for %s" %
                  name)
        return
    # See if there is an exact match
    # TODO: Check if there are multiple exact matches
    firstmatch = xmldata.find('series')
    if firstmatch and firstmatch.seriesname.string.lower() == name.lower():
        return int(firstmatch.seriesid.string)
    # If there is no exact match, sort by airing date and pick the latest
    # TODO: Is there a better way to do this? Maybe weight name similarity and air date
    series_list = [(s.firstaired.string, s.seriesid.string)
                   for s in xmldata.findAll('series', recursive=False)
                   if s.firstaired]
    if series_list:
        series_list.sort(key=lambda s: s[0], reverse=True)
        return int(series_list[0][1])
    else:
        raise LookupError('No results for `%s`' % name)

Example #55

0

Show file

File: host_list.py Project: webshell520/FuzzBunch

def parse_netbios(file_to_read):
    parsed = BeautifulStoneSoup(file(file_to_read).read())
    adapters = parsed.findAll('adapter')
    if adapters:
        call_name = parsed.find('callname').string
        if call_name[0].isdigit():
            ip_address = unicode(call_name.strip())
        else:
            ip_address = None
    netbios_list = []
    for adapter in adapters:
        mac_address = adapter['adapter_addr'].replace('.', ':').strip()
        names_list = adapter.findAll('names')
        host_name = None
        domain_name = None
        for names_elements in names_list:
            type = names_elements.find('type')
            name = names_elements.find('name')
            if (type.string == 'Workstation Service'):
                host_name = unicode(name.string.strip()).lower()
            elif (type.string == 'Domain Name'):
                domain_name = unicode(name.string.strip()).lower()
        netbios_list += [{
            'host_name': host_name,
            'domain_name': domain_name,
            'ip_address': ip_address,
            'mac_address': mac_address
        }]
    return netbios_list

Example #56

0

Show file

File: pbp_nbacom.py Project: tbarmann/basketball-data-scraper

class Extract():
    def __init__(self, xml, filename, game_name, away_team, home_team):
        self.xml = xml
        self.game_name = game_name
        self.filename = filename
        self.soup = BeautifulStoneSoup(self.xml)

        self.home_team = home_team
        self.away_team = away_team

    def extract(self):
        plays = self.splitRowsIntoPlays()
        row_indexes = self.getPeriodIndexes()
        indexed_plays = self.combinePlaysWithPeriodIndexes(row_indexes, plays)
        self.dumpToFile(indexed_plays)

    def getGameData(self):
        gamedata = self.soup.find("game")
        print gamedata.attrs

    def getPlayByPlayData(self):
        playbyplaydata = self.soup.findAll("event")

        for play in playbyplaydata:
            print dict(play.attrs)

    def dumpToFile(self, list_data):
        writer = csv.writer(open(
            LOGDIR_EXTRACT + self.filename + '_pbp_nbacom', 'wb'),
                            delimiter=',',
                            lineterminator='\n')
        writer.writerows(list_data)

Example #57

0

Show file

 def __init__(self, xml_path, deftype):
   self.type = deftype
   xml_file = open(xml_path)
   xml = xml_file.read()
   xml = xml.replace('<computeroutput>', '`').replace('</computeroutput>', '`')
   # TODO(josh11b): Should not use HTML entities inside ```...```.
   soup = BeautifulStoneSoup(
       xml, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
   self.name = soup.find('compoundname').text
   print('Making page with name ' + self.name + ' (from ' + xml_path + ')')
   members = soup('memberdef', prot='public')
   briefs = all_briefs(members)
   fulls = all_fulls(members)
   self.overview = page_overview(soup.find('compounddef'))
   self.page_text = PAGE_TEMPLATE.format(
       self.type, self.name, self.overview, fulls)