Python BeautifulSoup.find Exemples, util.BeautifulSoup.BeautifulSoup.find Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : update.py Projet : w3h/Beebeeto2

def get_onepage_poclist(page):
    info = getHtml("http://beebeeto.com/pdb" + '/?page=' + str(page))
    if '' == info:
        return ''

    bt = BeautifulSoup(info)
    end = bt.find('a', {'style' : "font-size: 20px;font-weight: bold; border-bottom: 3px solid #777777;"})
    if '1' == end.renderContents() and page != 1:
        return ''

    ret = bt.find('div', {'class' : 'mainlist'})
    ret = ret.renderContents()
    if ret == "":
        return ""

    retlist = []
    rets = re.findall('<a href=.*?>', ret)
    for one in rets:
        if "poc-" in one:
            one = one.replace('<a href="', "")
            one = one.replace('">', "")
            one = one.strip()
            retlist.append(one)
      
    return retlist

Exemple #2

0

Afficher le fichier

Fichier : update.py Projet : yantoumu/Beebeeto2

def get_onepage_poclist(page):
    info = getHtml("http://beebeeto.com/pdb" + '/?page=' + str(page))
    if '' == info:
        return ''

    bt = BeautifulSoup(info)
    end = bt.find(
        'a', {
            'style':
            "font-size: 20px;font-weight: bold; border-bottom: 3px solid #777777;"
        })
    if '1' == end.renderContents() and page != 1:
        return ''

    ret = bt.find('div', {'class': 'mainlist'})
    ret = ret.renderContents()
    if ret == "":
        return ""

    retlist = []
    rets = re.findall('<a href=.*?>', ret)
    for one in rets:
        if "poc-" in one:
            one = one.replace('<a href="', "")
            one = one.replace('">', "")
            one = one.strip()
            retlist.append(one)

    return retlist

Exemple #3

0

Afficher le fichier

Fichier : locations.py Projet : mjumbewu/pcs-api

 def parse_locations_from_preferences_body(self, response_body):
     location_profiles = []
     
     response_doc = BeautifulSoup(response_body)
     tbody_tag = response_doc.find('tbody', 
         {'id':'dpref_driver_pk__preferences_pk__driver_locations_pk__profiles'})
     
     if tbody_tag is None:
         raise ScreenscrapeParseError('No tbody found: %r' % response_body)
     
     tr_tags = tbody_tag.findAll('tr')
     for tr_tag in tr_tags:
         profile_name_td_tag = tr_tag.findAll('td', {'class':'profile_name'})[0]
         profile_desc_td_tag = tr_tag.findAll('td', {'class':'profile_descr'})[0]
         profile_id_radio_tag = tr_tag.findAll('input', {'class':'profile_default'})[0]
         
         profile_name = profile_name_td_tag.text
         profile_desc = profile_desc_td_tag.text
         profile_id = profile_id_radio_tag['value']
         profile_def = (profile_id_radio_tag.get('checked',None) == 'checked')
         
         location_profile = LocationProfile(profile_name,
                                   profile_id,
                                   profile_desc)
         location_profile.is_default = profile_def
         location_profiles.append(location_profile)
     
     return location_profiles

Exemple #4

0

Afficher le fichier

Fichier : update.py Projet : w3h/Beebeeto2

def getPoc(poc):
    info = getHtml("http://beebeeto.com/pdb/" + poc + "/")
    if '' == info:
        return ''
    if '<img src="/static/img/test.jpg"' in info:
        return ''
    if "立即查看" in info:
        return ''

    try:
        bt = BeautifulSoup(info)
        ret = bt.find('pre', {'class' : "brush: python;"})
        ret = ret.renderContents()
        if ret: 
            return strip_tags(ret)
        else:
            return ''
    except:
        return ''

Exemple #5

0

Afficher le fichier

Fichier : update.py Projet : yantoumu/Beebeeto2

def getPoc(poc):
    info = getHtml("http://beebeeto.com/pdb/" + poc + "/")
    if '' == info:
        return ''
    if '<img src="/static/img/test.jpg"' in info:
        return ''
    if "立即查看" in info:
        return ''

    try:
        bt = BeautifulSoup(info)
        ret = bt.find('pre', {'class': "brush: python;"})
        ret = ret.renderContents()
        if ret:
            return strip_tags(ret)
        else:
            return ''
    except:
        return ''

Exemple #6

0

Afficher le fichier

Fichier : module_posti.py Projet : nigeljonez/newpyfibot

def getstatus(code, count=None):
    url = baseurl % code
    f = urllib2.urlopen(url)
    d = f.read()
    f.close()

    bs = BeautifulSoup(d)

    res = []

    statuslist = (
        bs.find("div", {"class": "result_up"}).find("table", {"width": "500"}).findAll("p", {"class": "resulttext"})
    )
    for status in statuslist:
        date, statustext, location = status.contents
        statustext = statustext.string
        date = time.strptime(date, "%d.%m.%Y, klo %H:%M&nbsp;")
        location = location[6:].strip()

        dt = datetime.datetime(*date[0:6])
        now = datetime.datetime.now()
        age = now - dt

        agestr = []

        if age.days > 0:
            agestr.append("%dd" % age.days)

        secs = age.seconds
        hours, minutes, seconds = secs // 3600, secs // 60 % 60, secs % 60

        if hours > 0:
            agestr.append("%dh" % hours)
        if minutes > 0:
            agestr.append("%dm" % minutes)

        res.append("%s - %s - %s" % (" ".join(agestr) + " ago", statustext, location))

    if count:
        return res[:count]
    else:
        return res

Exemple #7

0

Afficher le fichier

Fichier : GTrendCron.py Projet : joeykblack/Appspot

def buildStoryFromString(data, stories):
    story=findStory(data, stories)
    if not story:
        url="http://www.google.com/search?q="+data.replace(' ', '+')
        logging.info(url)
        try:
            raw_data = http.getHttp(url)
            soup = BeautifulSoup(raw_data)
            story=None
            a=soup.find(lambda tag: tag.name=='a' and tag.attrs[0][0]=='href' and not tag.attrs[0][1].startswith('/') and not 'google' in tag.attrs[0][1])
            if a and a.text:
                story=Story()
                story.deleteFlag=False
                story.mykey=data
                story.title=''
                for c in a.contents:
                    if type(c) == Tag:
                        story.title+=c.text
                    else:
                        story.title+=c
                story.link=a.attrs[0][1]
                story.text=''
                for c in a.parent.contents[4].contents:
                    if type(c) == Tag:
                        story.text+=c.text
                    else:
                        story.text+=c
                story.put()
        except DownloadError: #@UndefinedVariable
            logging.error(url + ' failed to load')
    
    '''
    scraper=SearchScraper()
    scraper.feed(raw_data)
    return scraper.story
    '''

Exemple #8

0

Afficher le fichier

Fichier : views.py Projet : Saectar/tallstreet

		dburl = TallstreetUrls.get_url(url[0:-1])	
		if dburl:
			url = url[0:-1]
	if dburl:
		payload["url"] = dburl.url
		payload["title"] = dburl.title 
		payload["description"] = dburl.description
		payload["new"] = False
			
		for keyword in dburl.related_keywords:
			payload["tags"][keyword.tag.tag] = min(keyword.money / 1000 + 10, 30)
	else:
		page = fetch(url)
		soup = BeautifulSoup(page.content)
		payload["title"] = soup.html.head.title.string
		desc = soup.find("meta", {"name": "description"})
		if desc:
			payload["description"] = desc["content"]
		payload["url"] = url
		payload["new"] = True
		
	
	if keywords == []:
		invested = TallstreetPortfolio.get_keywords(request.user, dburl)
		for keyword in invested:
			if payload["tags"].has_key(keyword.keyword.tag):
				del payload["tags"][keyword.keyword.tag]	
			if keyword.keyword.tag == new_keyword:
				new_keyword = ""
			keywords.append({"keyword": keyword.keyword.tag, "amount": keyword.money, 'edit': False})
			payload["base_balance"] -= keyword.money