Exemple #1
0
    def __init__(self,
                 headers=None,
                 cookies=None,
                 cache_name=None,
                 expire_hours=4,
                 as_string=False):
        '''
        Initialize scraper object

        Args:
            headers: dict of headers
            cookies: cookies object
            cache_name: str
            expire_hours: int hours to keep in cache
            as_string: bool, false -> returns parsed json, true -> returns string

        Returns:
            scraper object
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        BasketballScraper.__init__(self,
                                   headers=headers,
                                   cookies=cookies,
                                   cache_name=cache_name,
                                   expire_hours=expire_hours,
                                   as_string=as_string)
Exemple #2
0
    def __init__(self, headers=None, cookies=None, cache_name=None):

        # see http://stackoverflow.com/questions/8134444
        logging.getLogger(__name__).addHandler(logging.NullHandler())

        if not headers:
            self.headers = {
                'Referer':
                'http://www.fantasylabs.com/nfl/player-models/',
                'User-Agent':
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0'
            }
        else:
            self.headers = headers

        self.cookies = cookies
        self.cache_name = cache_name

        BasketballScraper.__init__(self,
                                   headers=self.headers,
                                   cookies=self.cookies,
                                   cache_name=self.cache_name)

        self.maxindex = 400
        base_url = 'http://games.espn.go.com/ffl/tools/projections?'
        idx = [0, 40, 80, 120, 160, 200, 240, 280, 320, 360]
        self.projection_urls = [base_url + 'startIndex=' + x for x in idx]
Exemple #3
0
    def __init__(self, headers=None, cookies=None, cache_name=None):
        '''

        Args:
            headers:
            cookies:
            cache_name:
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name)
Exemple #4
0
    def __init__(self, headers=None, cookies=None, cache_name=None):
        '''

        Args:
            headers:
            cookies:
            cache_name:
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        # see http://stackoverflow.com/questions/8134444
        BasketballScraper.__init__(self, headers, cookies, cache_name)
Exemple #5
0
 def __init__(self,
              headers=None,
              cookies=None,
              cache_name=None,
              expire_hours=4,
              as_string=False):
     logging.getLogger(__name__).addHandler(logging.NullHandler())
     BasketballScraper.__init__(self,
                                headers=headers,
                                cookies=cookies,
                                cache_name=cache_name,
                                expire_hours=expire_hours,
                                as_string=as_string)
Exemple #6
0
    def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=12, as_string=False):
        '''
        Scraper for stats.nba.com (informal) API

        Args:
            headers: dictionary of HTTP headers
            cookies: cookie object, such as browsercookie.firefox()
            cache_name: str 'nbacomscraper'
            expire_hours: how long to cache requests
            as_string: return as raw string rather than json parsed into python data structure
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        BasketballScraper.__init__(self, headers=headers, cookies=cookies,
                                   cache_name=cache_name, expire_hours=expire_hours, as_string=as_string)
Exemple #7
0
    def __init__(self,
                 headers=None,
                 cookies=None,
                 cache_name=None,
                 expire_hours=4,
                 as_string=False):
        '''
        Initialize scraper object

        Args:
            headers: dict of headers
            cookies: cookies object
            cache_name: str
            expire_hours: int hours to keep in cache
            as_string: bool, false -> returns parsed json, true -> returns string

        Returns:
            scraper object
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        if not headers:
            self.headers = {
                'Referer':
                'http://www.fantasylabs.com/nfl/player-models/',
                'User-Agent':
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0'
            }
        else:
            self.headers = headers
        BasketballScraper.__init__(self,
                                   headers=self.headers,
                                   cookies=cookies,
                                   cache_name=cache_name,
                                   expire_hours=expire_hours,
                                   as_string=as_string)
        self.model_urls = {
            'default':
            'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=100605',
            'bales':
            'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=193714',
            'phan':
            'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=661266',
            'tournament':
            'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=193722',
            'cash':
            'http://www.fantasylabs.com/api/playermodel/2/{0}/?modelId=884277'
        }
Exemple #8
0
    def __init__(self, headers=None, cookies=None, cache_name=None):
        '''

        Args:
            headers:
            cookies:
            cache_name:
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        BasketballScraper.__init__(self,
                                   headers=headers,
                                   cookies=cookies,
                                   cache_name=cache_name)

        self.maxindex = 400
        base_url = 'http://games.espn.go.com/fba/tools/projections?startIndex={}'
        idx = [0, 40, 80, 120, 160, 200, 240, 280, 320, 360]
        self.projection_urls = [base_url.format(x) for x in idx]
Exemple #9
0
    def __init__(self, headers=None, cookies=None, cache_name=None):
        logging.getLogger(__name__).addHandler(logging.NullHandler())

        if not headers:
            self.headers = {
                'Referer':
                'http://www.fantasylabs.com/nfl/player-models/',
                'User-Agent':
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0'
            }
        else:
            self.headers = headers

        self.cookies = cookies
        self.cache_name = cache_name
        BasketballScraper.__init__(self,
                                   headers=self.headers,
                                   cookies=self.cookies,
                                   cache_name=self.cache_name)
Exemple #10
0
    def __init__(self, headers=None, cookies=None, cache_name=None, expire_hours=4, as_string=False):
        '''
        Initialize scraper object

        Args:
            headers: dict of headers
            cookies: cookies object
            cache_name: str
            expire_hours: int hours to keep in cache
            as_string: bool, false -> returns parsed json, true -> returns string

        Returns:
            scraper object
        '''
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        BasketballScraper.__init__(self, headers=headers, cookies=cookies, cache_name=cache_name,
                                   expire_hours=expire_hours, as_string=as_string)
        self.model_urls = {
            'default': 'https://www.fantasylabs.com/api/playermodel/2/{}/?modelId=193718&projOnly=true',
        }
Exemple #11
0
    def __init__(self,
                 headers=None,
                 cookies=None,
                 cache_name=None,
                 expire_hours=12,
                 as_string=False):
        '''
        Scraper for waybackmachine API

        Args:
            headers: dictionary of HTTP headers
            cookies: cookie object, such as browsercookie.firefox()
            cache_name: str 'nbacomscraper'
            expire_hours: how long to cache requests
            as_string: return as raw string rather than json parsed into python data structure
        '''
        self.wburl = 'http://archive.org/wayback/available?url={}&timestamp={}'
        logging.getLogger(__name__).addHandler(logging.NullHandler())
        BasketballScraper.__init__(self,
                                   headers=headers,
                                   cookies=cookies,
                                   cache_name=cache_name,
                                   expire_hours=expire_hours,
                                   as_string=as_string)
Exemple #12
0
        '''
        ps = []
        teams = [
            'ATL', 'BOS', 'BKN', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET',
            'GSW', 'HOU', 'IND', 'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN',
            'NOP', 'NYK', 'OKC', 'ORL', 'PHI', 'PHO', 'POR', 'SAC', 'SAS',
            'TOR', 'UTA', 'WAS'
        ]
        soup = BeautifulSoup(content, 'lxml')
        for team, t in zip(teams,
                           soup.find_all('table', {'class': 'basketball'})):
            for a in t.find_all('a'):
                try:
                    if 'Summary' in a['href']:
                        ps.append((team, a['href']))
                except:
                    logging.error('no href for {}'.format(a))
        return ps


if __name__ == "__main__":
    from nba.parsers.rotoworld import RotoworldNBAParser
    from nba.scrapers.scraper import BasketballScraper

    s = BasketballScraper(cache_name='rwtest')
    url = 'http://www.rotoworld.com/teams/depth-charts/nba.aspx'
    content = s.get(url)
    p = RotoworldNBAParser()
    print(p.depth_charts(content, '20170102'))

    #pass