Python BeautifulSoup.endswith Examples

Programming Language: Python

Namespace/Package Name: bs4

Class/Type: BeautifulSoup

Method/Function: endswith

Examples at hotexamples.com: 10

Python BeautifulSoup.endswith - 10 examples found. These are the top rated real world Python examples of bs4.BeautifulSoup.endswith extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

append(30)

BeautifulSoup(30)

__str__(30)

__init__(11)

attrs(10)

__len__(8)

__repr__(3)

__unicode__(2)

article(2)

__copy__(2)

__getattr__(2)

first(2)

findAllNext(2)

feed(1)

currentTag(1)

fartind(1)

BF(1)

filter_wikilinks(1)

fina_all(1)

fnd_all(1)

h1(1)

replace_with(1)

td(1)

toCSV(1)

copy(1)

alcohol(1)

astype(1)

assign(1)

apply(1)

add_structure(1)

add_shared_term(1)

a(1)

_title(1)

_repr_html_(1)

_find_all(1)

_all_strings(1)

__getitem__(1)

__contains__(1)

NavigableString(1)

Date(1)

wrap(1)

Example #1

Show file

def scrap(folderBase, s):
    filePath = folderBase + '/' + 'yahoo_mb_' + s + '.txt'
    output = open(filePath, 'w')
    output.write('=========\n')
    output.write('Timestamp: ' +
                 datetime.datetime.now().strftime(TIME_FORMAT) + '\n')
    output.write('=========\n')
    posters = driver.find_elements_by_xpath(xpath_poster)
    times = driver.find_elements_by_xpath(xpath_time)
    msgs = driver.find_elements_by_xpath(xpath_msg)

    print(len(posters), len(times), len(msgs))

    try:
        for i in range(len(msgs)):
            try:
                soup = BeautifulSoup(msgs[i].text,
                                     'html.parser').encode("utf-8")
                poster = posters[i].text
                time = times[i].text

                if not checkTime(s, time):
                    break

                output.write(poster + ' @ ' + time + '\n')
                if soup.endswith('More'):
                    output.write(soup[:-4])
                else:
                    output.write(soup + '\n')
                output.write('---------\n')
            except Exception as ex:
                pass
    finally:
        output.close()
    return filePath

Example #2

Show file

File: py2sms.py Project: rnbdev/py2sms

	def sendSMS(self,mobileno,text):
		if(self.notLogged):
			print("you are not logged in - call logIn()")
			return
		if(self.captchaNeeded):
			with open(self.captchaPath,"wb") as f:
				f.write(self.opener.open(self.captchaUrl).read())
			p = Popen(["display",self.captchaPath])
			self.dataDict['textcode'] = input("Captcha ? ")
			p.kill()
		if(len(text) <= 140):
			self.dataDict['mobNo'] = mobileno
			self.dataDict['text'] = text
			# print(self.postDataStr.format(**self.dataDict))
			try:
				h = self.opener.open(self.sendSMSUrl,self.postDataStr.format(**self.dataDict).encode())
				resp = h.read()
				try:
					msg = BS(resp).find("div",attrs={"id":"quicksms"}).find("div",attrs={"class":"quickname"}).text.strip()
					if msg.endswith("submitted successfully"): pass
					else: print("N : "+msg)
				except:
					print("N");self.captchaNeeded = True
				with open("successResp.html","wb") as f: f.write(resp)
			except urllib.error.HTTPError as error:
				pass

Example #3

Show file

def clean_str(raw: Optional[str],
              strip_trailing_period: bool = False) -> Optional[str]:
    """
    Takes a str and "cleans" it. Intended to be usable with short strings
    (names, titles) in any language. See scrub_text(), which extends this
    function for paragraph length and longer text fields.
    """
    if not raw:
        return None

    text = ftfy.fix_text(raw)

    # remove HTML
    text = BeautifulSoup(text, "html.parser").get_text()

    # TODO: for performance, compile these as globals?
    # replaces whitespace with single space
    text = re.sub(r"\s+", " ", text).strip()

    # TODO: shouldn't HTML be parsing these out?
    text = text.replace("<em>", "").replace("</em>", "")

    text = text.strip()

    if strip_trailing_period and text.endswith("."):
        text = text[:-1]

    if text.lower() in UNWANTED_SHORT_STRINGS:
        return None

    if not text:
        return None
    return text

Example #4

Show file

File: coca_cola.py Project: davidmarin/scrape-companies

def scrape_brands():

    brand_json = json.load(urlopen(US_BRANDS_URL))

    for brand in brand_json['brands']:
        name_html = brand['name']['desktop']
        name = BeautifulSoup(name_html).text.strip()  # resolve HTML entities
        if name.endswith('*'):
            yield dict(brand=name[:-1], is_licensed=True)
        else:
            yield name

Example #5

Show file

File: aamm.py Project: rshipp/appassure-mount-manager

 def get_progress(self, task_id):
     with AppAssureSession(self.server, self.port, self.username,
             self.password) as session:
         try:
             events = Events(session).taskMonitor(task_id).text
             percent = BeautifulSoup(events).td.td.text
             if not percent.endswith('%'):
                 percent = "100%"
             return int(percent[:-1])
         except AppAssureError as e:
             return e[1].text
         except (ValueError, AttributeError) as e:
             return str(e)

Example #6

Show file

File: aamm.py Project: rshipp/appassure-mount-manager

 def get_progress(self, task_id):
     with AppAssureSession(self.server, self.port, self.username,
                           self.password) as session:
         try:
             events = Events(session).taskMonitor(task_id).text
             percent = BeautifulSoup(events).td.td.text
             if not percent.endswith('%'):
                 percent = "100%"
             return int(percent[:-1])
         except AppAssureError as e:
             return e[1].text
         except (ValueError, AttributeError) as e:
             return str(e)

Example #7

Show file

File: coca_cola.py Project: spendright/scrape-companies

def scrape_company():

    yield 'company', {'company': COMPANY, 'url': COMPANY_URL}

    brand_json = json.load(urlopen(US_BRANDS_JSON_URL))

    for brand_dict in brand_json['brands']:
        brand = dict(company=COMPANY)

        name_html = brand_dict['name']['desktop']
        name = BeautifulSoup(name_html).text.strip()  # resolve HTML entities
        if name.endswith('*'):
            brand['brand'] = name[:-1]
            brand['is_licensed'] = True
        else:
            brand['brand'] = name

        if brand_dict['brand_url']:
            brand['url'] = urljoin(ALL_BRANDS_URL, brand_dict['brand_url'])

        yield 'brand', brand

Example #8

Show file

File: practice.py Project: kristenmabry/jarchive

sjAnswers = [[0 for x in range(6)] for y in range(5)]
djAnswers = [[0 for x in range(6)] for y in range(5)]
row = 0
col = 0

for square in sjDivs:
    text = square.find('td', class_='clue_text')
    if text:
        sjClues[row][col] = text.text
    answerDiv = square.find('div')
    if answerDiv:
        answer = extract.search(str(answerDiv))
        pretty = BeautifulSoup(answer.group(1), 'html.parser').text
        if pretty.startswith('<i>'):
            pretty = pretty[3:]
        if pretty.endswith('</i>'):
            pretty = pretty[:-4]
        sjAnswers[row][col] = pretty
    col += 1
    if col == 6:
        col = 0
        row += 1

row = 0
col = 0
for square in djDivs:
    text = square.find('td', class_='clue_text')
    if text:
        djClues[row][col] = text.text
    answerDiv = square.find('div')
    if answerDiv:

Example #9

Show file

File: script_parallFull.py Project: NelaSvozilikova/freelance

def descargarCategoriaEspecifica22(URLLL, resultados):
	resultado = descargarResultado("/producto/" +  URLLL , 360, 10);


	try: 
		codigo = URLLL
	except:
		codigo = ''

	try: 
		nombre = resultado.split('<h2 class="with-tabs">')[1].split('</h2>')[0].replace("\\t",'').strip()
	except:
		nombre = ''
	
	try:	
		categoria = resultado.split('<b>Categor')[1].split('</div>')[0].split('</b>')[1].replace("\\t",'').replace("\\n",'').strip()
	except:
		categoria = ''

	try:	
		costo = resultado.split('class="uc-price">')[2].split('<')[0].replace("\\t",'').strip()
	except:
		costo = ''

	try:
		fotos = 'http://www.radec.com.mx/sites/all/files/productos/' + codigo + '.jpg';
	except:
		fotos = ''

	val = 0;

	nombre2 = nombre

	try:
		for car in resultado.split("/sites/all/themes/radec/images/car_icon.gif"):


			marca = ''
			marca_auto = ''
			modelo = ''
			anio = ''
			notas = ''

			if (val == 0):
				val = 1;
			else:
				try:
					marca_auto = car.split('<')[0].split('>')[1].replace("\\t",'').split('\\n')[2].strip()
				except:
					marca_auto = ''


				try:
					marca = ''

					if (' TYC ' in nombre):
						marca = 'TYC'
					
					if ( ' DEPO ' in nombre):
						marca = 'DEPO' 
				except:
					marca = ''

				try:				
					modelo = car.split('<')[0].split('>')[1].replace("\\t",'').split('\\n')[3].strip()
				except:
					modelo = ''



				anio = car.split('<')[0].split('>')[1].replace("\\t",'').split('\\n')[5].strip()


				if (anio != 'ALL YEARS'):
					anioOrigin2 = '#'+anio;
					anioOrigin = anioOrigin2.replace('#20','').replace('#19','').replace('-20','-').replace('-19','-')
					anioList = [];
	

					if ('-' in anio):
						
						anioInicio = int(anio.split('-')[0])
						anioFin = int(anio.split('-')[1] )

						while (anioInicio <= anioFin):
							anioList.append(str(anioInicio))
							anioInicio = anioInicio + 1;
	
						anio = ' '.join(anioList) + ' '
					
				
					if (len(anioList) < 5):
						nombre = nombre.replace(anioOrigin,anio);
					else:
						nombre = nombre.replace(anioOrigin,anioOrigin2.replace('#','').replace('-',' a '));

		

				try:	
					notas = resultado.split('<b>Aplicaciones:</b>')[1].split('</div>')[0].replace("\\t",'').replace("\\n",'').replace('<br/>',' - ')

					notas = BeautifulSoup(notas, 'html.parser').text;

					while ("  " in notas):
						notas = notas.replace('  ',' ');

					if (notas.startswith(' - ')):
						notas = notas.replace(" - ", "", 1)

					if (notas.endswith(' - ')):
						notas = rreplace(notas," - ", "", 1);

				except:
					notas = ''



				nombre= nombre.replace(' FD ', ' FORD ').replace(' CV ', ' CHEVROLET ').replace(' TY ', ' TOYOTA ').replace(' AD ', ' AUDI ').replace(' BK ', ' BUICK ').replace(' MC ', ' MERCEDES BENZ ').replace(' ST ', ' SEAT ').replace(' VW ', ' VOLKSWAGEN ').replace(' KI ', ' KIA ').replace(' NS ', ' NISSAN ').replace(' HD ', ' HONDA ').replace(' SN ',' SATURN ').replace(' JP ', ' JEEP ').replace(' AC ', ' ACURA ').replace(' DG ', ' DODGE ').replace(' PT ',' PONTIAC ').replace(' BW ', ' BMW ').replace(' CR ', ' CHRYSLER ').replace(' MT ', ' MITSUBISHI ').replace(' PG ',' PEUGEOT ').replace(' UNIV ', ' UNIVERSAL ').replace(' CR ', ' CHRYSLER ').replace(' MT ', ' MITSUBISHI ').replace(' PG ',' PEUGEOT ')
				nombre= nombre.replace(' JGO ', ' JUEGO ').replace(' CD ', ' CADILLAC ')

		resultados.append('"'+codigo+'","'+nombre +'","'+ marca +'","'+ marca_auto +'","'+ categoria +'","'+costo +'","' + modelo +'","'+ fotos+'","'+ anio +'","'+ notas +'"');
	except Exception as e:
		print('FALLO ---- > ' + URLLL)
		resultados.append('"'+URLLL+'"');
	
	return;

Example #10

Show file

File: organized.py Project: kristenmabry/jarchive

def getRound(soup, id, gameId, round):
    div = soup.find('div', id=id)
    if div:
        categories = div.find_all('td', class_="category_name")
        # f = open("soup.txt","w")
        # f.write(soup.prettify())
        # f.close()
        if len(categories) == 6:
            categories = list(map(getText, categories))
            clueDivs = div.find_all('td', class_='clue')
        else:
            categories = soup.find_all('td', class_="category_name")
            clueDivs = soup.find_all('td', class_='clue')
            if round == 'Single':
                categories = categories[:6]
                clueDivs = clueDivs[:6]
            elif len(categories) >= 12:
                categories = categories[6:12]
                clueDivs = clueDivs[6:12]
            else:
                categories = []
                clueDivs = []
            categories = list(map(getText, categories))

        if len(categories) > 0:
            clues = [[0 for x in range(6)] for y in range(5)]
            answers = [[0 for x in range(6)] for y in range(5)]
            extract = re.compile('correct_response&quot;&gt;(.*)&lt;/em&gt;')

            row = 0
            col = 0
            numClues = 0

            for square in clueDivs:
                text = square.find('td', class_='clue_text')
                if text:
                    clues[row][col] = text.text
                    numClues += 1
                answerDiv = square.find('div')
                if answerDiv:
                    answer = extract.search(str(answerDiv))
                    pretty = BeautifulSoup(answer.group(1), 'html.parser').text
                    if pretty.startswith('<i>'):
                        pretty = pretty[3:]
                    if pretty.endswith('</i>'):
                        pretty = pretty[:-4]
                    answers[row][col] = pretty
                col += 1
                if col == 6:
                    col = 0
                    row += 1

            for col in range(6):
                sql = "INSERT INTO Categories (GameId, RoundCode, Name) VALUES (%s, %s, %s)"
                val = (gameId, round, categories[col])
                mycursor.execute(sql, val)
                mydb.commit()
                categoryId = mycursor.lastrowid

                sql = "Insert Into Clues (Categoryid, PointVal, Clue, Answer) Values (%s, %s, %s, %s)"
                val = []
                for row in range(5):
                    val.append((categoryId, row * 200 + 200, clues[row][col],
                                answers[row][col]))
                mycursor.executemany(sql, val)
                mydb.commit()

            print('\t', round, ': ', numClues)
        else:
            print('\t', round, ': no clues')