Esempio n. 1
0
def mail():
    mail_headlines_array = []
    url = "http://www.dailymail.co.uk"
    soup = soupifier(url)
    headlines_total = searchifier({
        "soup": soup,
        "tag": 'h2',
        "attribute": 'class',
        "names": 'linkro-darkred',
        "find": 0
    })
    headlines_total = tenifier(headlines_total)
    for headline in headlines_total:
        headline = searchifier({
            "soup": headline,
            "tag": 'a',
            "names": None,
            "find": 0
        })[0]
        link = linkifier(url, headline)
        soup = soupifier(link)
        body_soup = searchifier({
            "soup": soup,
            "tag": 'div',
            "attribute": 'itemprop',
            "names": 'articleBody',
            "find": 1
        })
        body = bodifier(body_soup)
        appendifier(mail_headlines_array, headline, link, url, body)
    return mail_headlines_array
Esempio n. 2
0
def guardian():
    guardianuk_headlines_array = []
    url = "https://www.theguardian.com/uk"
    soup = soupifier(url)
    section = searchifier({
        "soup": soup,
        "tag": 'section',
        "attribute": 'id',
        "names": 'headlines',
        "find": 0
    })
    headlines_total = searchifier({
        "soup": section,
        "tag": 'a',
        "attribute": 'class',
        "names": 'u-faux-block-link__overlay js-headline-text',
        "find": 0
    })
    headlines_total = tenifier(headlines_total)
    for headline in headlines_total:
        link = linkifier('https://www.theguardian.com/', headline)
        soup = soupifier(link)
        body_soup = searchifier({
            "soup": soup,
            "tag": 'div',
            "attribute": 'class',
            "names": 'content__article-body from-content-api js-article__body',
            "find": 0
        })
        body = bodifier(body_soup)
        appendifier(guardianuk_headlines_array, headline, link, url, body)
    return guardianuk_headlines_array
Esempio n. 3
0
def bbc():
	bbc_headlines_array = []
	url = "http://www.bbc.co.uk/news"
	soup = soupifier(url)
	headlines_total = searchifier({
		"soup": soup,
		"tag": 'a',
		"attribute": 'class',
		"names": 'gs-c-promo-heading',
		"find": 0
	})
	headlines = tenifier(headlines_total)
	for headline in headlines:
		link = linkifier('http://www.bbc.co.uk/', headline)	
		soup = soupifier(link)
		body_soup = searchifier({
			"soup": soup,
			"tag": 'div',
			"attribute": 'class',
			"names": ['story-body__inner','vxp-media__body','story-body sp-story-body gel-body-copy'],
			"find": 0
		})
		body = bodifier(body_soup)
		appendifier(bbc_headlines_array, headline, link, url, body)
	return bbc_headlines_array
Esempio n. 4
0
def independent():
	independent_headlines_array = []
	headlines_array = []
	url = "https://www.independent.co.uk"
	soup = soupifier(url)
	section_content = searchifier({"soup": soup, "tag": 'section', "attribute": 'class', "names": 'section-content', "find": 1})
	splash_row = searchifier({"soup": section_content, "tag": 'div', "attribute": 'class', "names": 'splash-row', "find": 1})
	headlines_total = searchifier({"soup": splash_row, "tag": 'div', "attribute": 'class', "names": 'content', "find": 0})
	for headline in headlines_total:
		top_two = searchifier({"soup": headline, "tag": 'h2', "names": None, "find": 1})
		if top_two:
			link = searchifier({"soup": headline, "tag": 'a', "names": None, "find": 1})
			link = linkifier(url, link)
			body_soup = soupifier(link)
			body_soup = searchifier({"soup": body_soup, "tag": 'div', "attribute": 'class', "names": 'body-content', "find": 1})
			body = bodifier(body_soup)	
			appendifier(independent_headlines_array, headline, link, url, body)
	eight_articles_dmpu = searchifier({"soup": soup, "tag": 'div', "attribute": 'class', "names": 'eight-articles-dmpu position-left', "find": 1})
	top_eight = searchifier({"soup": eight_articles_dmpu, "tag": 'div', "attribute": 'class', "names": 'content', "find": 0})
	for headline in top_eight:
		headline = searchifier({"soup": headline, "tag": 'a', "names": None, "find": 0})[1:]
		for element in headline:
			link = linkifier(url, element)
			headline = searchifier({"soup": element, "tag": 'div', "attribute": 'class', "names": 'headline', "find": 1})
			body_soup = soupifier(link)
			body_soup = searchifier({"soup": body_soup, "tag": 'div', "attribute": 'class', "names": 'body-content', "find": 1})
			body = bodifier(body_soup)
			appendifier(independent_headlines_array, headline, link, url, body)
	return independent_headlines_array
Esempio n. 5
0
def sun():
    the_sun_headlines_array = []
    url = "https://www.thesun.co.uk"
    soup = soupifier(url)
    for item in soup.find_all('a', {'class': 'text-anchor-wrap'}, limit=10):
        headline = item.find('p', {'class': 'teaser__subdeck'})
        link = item['href']
        soup2 = soupifier(link)
        div2 = soup2.find_all('div', {'class': 'article__content'})
        if div2:
            ps = div2[0].find_all('p')
            body = [p.text.strip() for p in ps]
        else:
            body = []
        appendifier(the_sun_headlines_array, headline, link, url, body)
    return the_sun_headlines_array
Esempio n. 6
0
def telegraph():
	telegraph_headlines_array = []
	url = "https://www.telegraph.co.uk"
	soup = soupifier(url)
	headlines_total = searchifier({
        "soup": soup,
        "tag": 'h3',
        "attribute": 'class',
        "names": ['list-of-entities__item-body-headline','list-headline'],
        "find": 0
	})
	headlines_total = tenifier(headlines_total)	
	for headline in headlines_total:		
		headline_ = searchifier([{
                "soup": headline,
                "tag": 'a',
                "names": None,
                "find": 1
            },
	        {
                "soup": headline,
                "tag": 'span',
                "attribute": 'class',
                "names": 'list-of-entities__item-headline-text',
                "find": 1
            }])
		link = linkifier(url, headline_)
		if link is False:
			headline_ = headline.parent
			link = linkifier(url, headline_)
		soup = soupifier(link)
		body_soup = searchifier([{
		"soup": soup,
		"tag": 'article',
		"names": None,
		"find": 0
		},
		{
		"soup": soup,
		"tag": 'div',
		"attribute": 'class',
		"names": 'js-article-inner',
		"find": 0
		}])
		body = bodifier(body_soup)	
		appendifier(telegraph_headlines_array, headline, link, url, body)
	return telegraph_headlines_array