def getTournaments():

    import requests
    import re
    from BeautifulSoup import BeautifulSoup

    # https://smash.gg/tournaments?per_page=30&filter=%7B%22name%22%3A%22%22%2C%22past%22%3Atrue%2C%22upcoming%22%3Afalse%7D&page=1

    maxPage = 2  # find a way to get this number

    urlBase = "https://smash.gg/tournaments?per_page=30&fipoopC%22past%22%3Atrue%7D&page="

    for i in range(1, maxPage):

        url = urlBase + str(i)

        # Do request
        # try:
        # 	data = requests.get(url).text
        # except ChunkedEncodingError:
        # 	print("Error with tournament: " + tournament.name)
        # 	return False

        print url
        data = requests.get(url).text
        print data
        with open('requesttest.txt', 'w') as file:
            file.write(data)
        soup = BeautifulSoup(data)

        divs = soup.findall("div", {"class": "TournamentCardContainer"})
        print len(divs)
Example #2
0
def scrape_page(url):
    html = scraperwiki.scrape(url)
    soup = BeautifulSoup(html)
    #print soup.prettify()
    #link_table=soup.find("div", {"class" : "alphabet_list clearfix"})
    profiles = soup.findall("div", { "class" : "profileFriendsText" })  #find the section where the friends are
    #next_link=soup.findAll("a")
    for profile in profiles:
        next_url=link['href']
        print next_url
def scrape_page(url):
    html = scraperwiki.scrape(url)
    soup = BeautifulSoup(html)
    # print soup.prettify()
    # link_table=soup.find("div", {"class" : "alphabet_list clearfix"})
    profiles = soup.findall("div", {"class": "profileFriendsText"})  # find the section where the friends are
    # next_link=soup.findAll("a")
    for profile in profiles:
        next_url = link["href"]
        print next_url
Example #4
0
def printLinks(url):
    ab = anonBrowser()
    ab.anonymize()
    page = ab.open(url)
    html = page.read()
    try:
        print '[+] Printing links from regex.'
        link_finder = re.compile('href="(.*?)"')
        links = link_finder.findall(html)
        for link in links:
            print link
    except:
        pass
    try:
        print '\n[+] Printing links from BeautifulSoup.'
        soup = BeautifulSoup(html)
        links = soup.findall(name='a')
        for link in links:
            if link.has_key('href'):
                print link['href']
    except:
        pass
import scraperwiki     #Namespace for Scrapper wiki web site
from BeautifulSoup import BeautifulSoup   #Import the namespace to read web pages

print "TOP 10 countries in currency "

Page = scraperwiki.scrape('http://www.xe.com/')
Source = BeautifulSoup(Page)
scraperwiki.metadata.save('columns', ['country name  ', 'currency name', 'worldrank','highest denomination till date' ,'year of currency eastablishment','trading','mobile currency site'])
MainTable = Source.findall ("table", { "trading" : "1":"10" })
RowDetails = MainTable.findAll("tr")
print "****Scrapping Started*****"
for row in RowDetails:
        Dicrecord = {}  #Create Dictionary to store top currency Details
        Columns = row.findAll("td")
        if Columns:
            Dicrecord['country name'] = Columns[0].text
            Dicrecord['currency name'] = Columns[1].text
            Dicrecord['highest denomination till date'] = Columns[2].text
            Dicrecord['year of currency eastablishment'] = Columns[3].text
            Dicrecord['trading'] = Columns[5].text
            Dicrecord['mobile currency site'] = Columns[8].text   
            scraperwiki.datastore.save(["top 10 currency"], Dicrecord)
            print Dicrecord
print "****Scrapping Complted*****"
import scraperwiki     #Namespace for Scrapper wiki web site
from BeautifulSoup import BeautifulSoup   #Import the namespace to read web pages

print "TOP 10 countries in currency "

Page = scraperwiki.scrape('http://www.xe.com/')
Source = BeautifulSoup(Page)