Exemple #1
0
def regressionTests():
    """Run sitescraper against regression tests to ensure model generation not broken by changes"""
    for module in testdata.__all__:
        ss = sitescraper(debug=False)
        website = getattr(testdata, module)
        data = [('testdata/%s/%s' % (module, url), output)
                for (url, output) in website.data]
        for filename, output in data[:-1]:
            ss.add(filename, output)
        print '\n' + str(module)
        print ss.model()

        filename = data[-1][0]
        our_scrape = ss.scrape(filename)
        expected_scrape = sitescraper(model=website.model).scrape(filename)
        if all(our_scrape) and our_scrape == expected_scrape:
            # test passed
            print 'Passed'
        else:
            # expected xpath did not match so test failed
            print 'Expected:'
            print expected_scrape
            print 'Scraped:'
            print our_scrape
            print 'Expected:'
            printModel(HtmlXpathSet(website.model))
            print 'Scraped:'
            printModel(ss.model())
            if not all(our_scrape): print 'Failed to scrape all'
            if our_scrape != expected_scrape: print 'Scrapes do not match'
Exemple #2
0
def regressionTests():
    """Run sitescraper against regression tests to ensure model generation not broken by changes"""
    for module in testdata.__all__:
        ss = sitescraper(debug=False)
        website = getattr(testdata, module)
        data = [("testdata/%s/%s" % (module, url), output) for (url, output) in website.data]
        for filename, output in data[:-1]:
            ss.add(filename, output)
        print "\n" + str(module)
        print ss.model()

        filename = data[-1][0]
        our_scrape = ss.scrape(filename)
        expected_scrape = sitescraper(model=website.model).scrape(filename)
        if all(our_scrape) and our_scrape == expected_scrape:
            # test passed
            print "Passed"
        else:
            # expected xpath did not match so test failed
            print "Expected:"
            print expected_scrape
            print "Scraped:"
            print our_scrape
            print "Expected:"
            printModel(HtmlXpathSet(website.model))
            print "Scraped:"
            printModel(ss.model())
            if not all(our_scrape):
                print "Failed to scrape all"
            if our_scrape != expected_scrape:
                print "Scrapes do not match"
Exemple #3
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
from sitescraper import sitescraper
from pylab import *
import numpy

ss = sitescraper()

#url='http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=1&module=matchup'
url = 'http://basketball.fantasysports.yahoo.com/nba/86110/?matchup_week=2&module=matchupsmack&matchupsmacktab=m'

#data = [".451 ",".793","16","271","151","109","35","15"," 56",'<td  class="stat"><strong>47</strong></td>',"7"]
data = [[
    "FG%", "FT%", "3PTM", "PTS", "REB", "AST", "ST", "BLK", "TO", "Score"
], [".473 ", ".716", "44", "441", "210", "105", "34", "20", " 88", "6"]]

ss.add(url, data)
cur_week = ss.scrape(
    'http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=2')

print cur_week
sys.exit(0)

#print(ss.scrape('http://basketball.fantasysports.yahoo.com/nba/87421/matchup?week=3&mid1=18&mid2=1'))
weeks = []
for week_num in range(2, 5):
    #for week_num in range(2,21) :
    print week_num
    cur_week = ss.scrape(
Exemple #4
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
from sitescraper import sitescraper
from pylab import *
import numpy

ss = sitescraper()

#url='http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=1&module=matchup'
url='http://basketball.fantasysports.yahoo.com/nba/86110/?matchup_week=2&module=matchupsmack&matchupsmacktab=m'

#data = [".451 ",".793","16","271","151","109","35","15"," 56",'<td  class="stat"><strong>47</strong></td>',"7"]
data=[["FG%","FT%","3PTM","PTS","REB","AST","ST","BLK","TO","Score"], [".473 ",".716","44","441","210","105","34","20"," 88","6"]]

ss.add(url, data)
cur_week=ss.scrape('http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=2')

print cur_week
sys.exit(0)

#print(ss.scrape('http://basketball.fantasysports.yahoo.com/nba/87421/matchup?week=3&mid1=18&mid2=1'))
weeks=[]
for week_num in range(2,5) :
#for week_num in range(2,21) :
  print week_num
  cur_week=ss.scrape('http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week='+str(week_num)+'&mid1=18&mid2=1')
  weeks.append(cur_week[1])
#  print(cur_week[1])