def regressionTests(): """Run sitescraper against regression tests to ensure model generation not broken by changes""" for module in testdata.__all__: ss = sitescraper(debug=False) website = getattr(testdata, module) data = [('testdata/%s/%s' % (module, url), output) for (url, output) in website.data] for filename, output in data[:-1]: ss.add(filename, output) print '\n' + str(module) print ss.model() filename = data[-1][0] our_scrape = ss.scrape(filename) expected_scrape = sitescraper(model=website.model).scrape(filename) if all(our_scrape) and our_scrape == expected_scrape: # test passed print 'Passed' else: # expected xpath did not match so test failed print 'Expected:' print expected_scrape print 'Scraped:' print our_scrape print 'Expected:' printModel(HtmlXpathSet(website.model)) print 'Scraped:' printModel(ss.model()) if not all(our_scrape): print 'Failed to scrape all' if our_scrape != expected_scrape: print 'Scrapes do not match'
def regressionTests(): """Run sitescraper against regression tests to ensure model generation not broken by changes""" for module in testdata.__all__: ss = sitescraper(debug=False) website = getattr(testdata, module) data = [("testdata/%s/%s" % (module, url), output) for (url, output) in website.data] for filename, output in data[:-1]: ss.add(filename, output) print "\n" + str(module) print ss.model() filename = data[-1][0] our_scrape = ss.scrape(filename) expected_scrape = sitescraper(model=website.model).scrape(filename) if all(our_scrape) and our_scrape == expected_scrape: # test passed print "Passed" else: # expected xpath did not match so test failed print "Expected:" print expected_scrape print "Scraped:" print our_scrape print "Expected:" printModel(HtmlXpathSet(website.model)) print "Scraped:" printModel(ss.model()) if not all(our_scrape): print "Failed to scrape all" if our_scrape != expected_scrape: print "Scrapes do not match"
#!/usr/bin/python # -*- coding: utf-8 -*- import sys from sitescraper import sitescraper from pylab import * import numpy ss = sitescraper() #url='http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=1&module=matchup' url = 'http://basketball.fantasysports.yahoo.com/nba/86110/?matchup_week=2&module=matchupsmack&matchupsmacktab=m' #data = [".451 ",".793","16","271","151","109","35","15"," 56",'<td class="stat"><strong>47</strong></td>',"7"] data = [[ "FG%", "FT%", "3PTM", "PTS", "REB", "AST", "ST", "BLK", "TO", "Score" ], [".473 ", ".716", "44", "441", "210", "105", "34", "20", " 88", "6"]] ss.add(url, data) cur_week = ss.scrape( 'http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=2') print cur_week sys.exit(0) #print(ss.scrape('http://basketball.fantasysports.yahoo.com/nba/87421/matchup?week=3&mid1=18&mid2=1')) weeks = [] for week_num in range(2, 5): #for week_num in range(2,21) : print week_num cur_week = ss.scrape(
#!/usr/bin/python # -*- coding: utf-8 -*- import sys from sitescraper import sitescraper from pylab import * import numpy ss = sitescraper() #url='http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=1&module=matchup' url='http://basketball.fantasysports.yahoo.com/nba/86110/?matchup_week=2&module=matchupsmack&matchupsmacktab=m' #data = [".451 ",".793","16","271","151","109","35","15"," 56",'<td class="stat"><strong>47</strong></td>',"7"] data=[["FG%","FT%","3PTM","PTS","REB","AST","ST","BLK","TO","Score"], [".473 ",".716","44","441","210","105","34","20"," 88","6"]] ss.add(url, data) cur_week=ss.scrape('http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week=2') print cur_week sys.exit(0) #print(ss.scrape('http://basketball.fantasysports.yahoo.com/nba/87421/matchup?week=3&mid1=18&mid2=1')) weeks=[] for week_num in range(2,5) : #for week_num in range(2,21) : print week_num cur_week=ss.scrape('http://basketball.fantasysports.yahoo.com/nba/86110/matchup?week='+str(week_num)+'&mid1=18&mid2=1') weeks.append(cur_week[1]) # print(cur_week[1])