def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=4, as_string=False): ''' Initialize scraper object Args: headers: dict of headers cookies: cookies object cache_name: str expire_hours: int hours to keep in cache as_string: bool, false -> returns parsed json, true -> returns string Returns: scraper object ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name, expire_hours=expire_hours, as_string=as_string)
def __init__(self, headers=None, cookies=None, cache_name=None): # see http://stackoverflow.com/questions/8134444 logging.getLogger(__name__).addHandler(logging.NullHandler()) if not headers: self.headers = { 'Referer': 'http://www.fantasylabs.com/nfl/player-models/', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0' } else: self.headers = headers self.cookies = cookies self.cache_name = cache_name BasketballScraper.__init__(self, headers=self.headers, cookies=self.cookies, cache_name=self.cache_name) self.maxindex = 400 base_url = 'http://games.espn.go.com/ffl/tools/projections?' idx = [0, 40, 80, 120, 160, 200, 240, 280, 320, 360] self.projection_urls = [base_url + 'startIndex=' + x for x in idx]
def __init__(self, headers=None, cookies=None, cache_name=None): ''' Args: headers: cookies: cache_name: ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name)
def __init__(self, headers=None, cookies=None, cache_name=None): ''' Args: headers: cookies: cache_name: ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) # see http://stackoverflow.com/questions/8134444 BasketballScraper.__init__(self, headers, cookies, cache_name)
def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=4, as_string=False): logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name, expire_hours=expire_hours, as_string=as_string)
def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=12, as_string=False): ''' Scraper for stats.nba.com (informal) API Args: headers: dictionary of HTTP headers cookies: cookie object, such as browsercookie.firefox() cache_name: str 'nbacomscraper' expire_hours: how long to cache requests as_string: return as raw string rather than json parsed into python data structure ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name, expire_hours=expire_hours, as_string=as_string)
def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=4, as_string=False): ''' Initialize scraper object Args: headers: dict of headers cookies: cookies object cache_name: str expire_hours: int hours to keep in cache as_string: bool, false -> returns parsed json, true -> returns string Returns: scraper object ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) if not headers: self.headers = { 'Referer': 'http://www.fantasylabs.com/nfl/player-models/', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0' } else: self.headers = headers BasketballScraper.__init__(self, headers=self.headers, cookies=cookies, cache_name=cache_name, expire_hours=expire_hours, as_string=as_string) self.model_urls = { 'default': 'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=100605', 'bales': 'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=193714', 'phan': 'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=661266', 'tournament': 'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=193722', 'cash': 'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=884277' }
def __init__(self, headers=None, cookies=None, cache_name=None): ''' Args: headers: cookies: cache_name: ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name) self.maxindex = 400 base_url = 'http://games.espn.go.com/fba/tools/projections?startIndex={}' idx = [0, 40, 80, 120, 160, 200, 240, 280, 320, 360] self.projection_urls = [base_url.format(x) for x in idx]
def __init__(self, headers=None, cookies=None, cache_name=None): logging.getLogger(__name__).addHandler(logging.NullHandler()) if not headers: self.headers = { 'Referer': 'http://www.fantasylabs.com/nfl/player-models/', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0' } else: self.headers = headers self.cookies = cookies self.cache_name = cache_name BasketballScraper.__init__(self, headers=self.headers, cookies=self.cookies, cache_name=self.cache_name)
def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=4, as_string=False): ''' Initialize scraper object Args: headers: dict of headers cookies: cookies object cache_name: str expire_hours: int hours to keep in cache as_string: bool, false -> returns parsed json, true -> returns string Returns: scraper object ''' logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name, expire_hours=expire_hours, as_string=as_string) self.model_urls = { 'default': 'https://www.fantasylabs.com/api/playermodel/2/{}/?modelId=193718&projOnly=true', }
def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=12, as_string=False): ''' Scraper for waybackmachine API Args: headers: dictionary of HTTP headers cookies: cookie object, such as browsercookie.firefox() cache_name: str 'nbacomscraper' expire_hours: how long to cache requests as_string: return as raw string rather than json parsed into python data structure ''' self.wburl = 'http://archive.org/wayback/available?url={}×tamp={}' logging.getLogger(__name__).addHandler(logging.NullHandler()) BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name, expire_hours=expire_hours, as_string=as_string)
''' ps = [] teams = [ 'ATL', 'BOS', 'BKN', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHO', 'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS' ] soup = BeautifulSoup(content, 'lxml') for team, t in zip(teams, soup.find_all('table', {'class': 'basketball'})): for a in t.find_all('a'): try: if 'Summary' in a['href']: ps.append((team, a['href'])) except: logging.error('no href for {}'.format(a)) return ps if __name__ == "__main__": from nba.parsers.rotoworld import RotoworldNBAParser from nba.scrapers.scraper import BasketballScraper s = BasketballScraper(cache_name='rwtest') url = 'http://www.rotoworld.com/teams/depth-charts/nba.aspx' content = s.get(url) p = RotoworldNBAParser() print(p.depth_charts(content, '20170102')) #pass