Python get_file Beispiele, hockey_scraper.utils.shared.get_file Python Beispiele

Beispiel #1

0

Datei anzeigen

def get_shifts(game_id):
    """
    Given a game_id it returns a the shifts for both teams
    Ex: http://www.nhl.com/scores/htmlreports/20162017/TV020971.HTM
    
    :param game_id: the game
    
    :return: Shifts or None
    """
    game_id = str(game_id)
    home_url = "http://www.nhl.com/scores/htmlreports/{}{}/TH{}.HTM".format(
        game_id[:4], int(game_id[:4]) + 1, game_id[4:]
    )
    away_url = "http://www.nhl.com/scores/htmlreports/{}{}/TV{}.HTM".format(
        game_id[:4], int(game_id[:4]) + 1, game_id[4:]
    )

    page_info = {
        "url": home_url,
        "name": game_id,
        "type": "html_shifts_home",
        "season": game_id[:4],
    }

    # Get info for home shifts
    home = shared.get_file(page_info)

    # Change info for scraping away page and scrape it
    page_info["type"] = "html_shifts_away"
    page_info["url"] = away_url
    away = shared.get_file(page_info)

    return home, away

Beispiel #2

0

Datei anzeigen

def get_pbp(game_id):
    """
    Given a game_id it returns the raw json
    Ex: http://statsapi.web.nhl.com/api/v1/game/2016020475/feed/live
    
    :param game_id: string - the game
    
    :return: raw json of game or None if couldn't get game
    """
    page_info = {
        "url":
        'http://statsapi.web.nhl.com/api/v1/game/{}/feed/live'.format(game_id),
        "name":
        game_id,
        "type":
        "json_pbp",
        "season":
        game_id[:4],
    }
    response = shared.get_file(page_info)

    if not response:
        shared.print_error(
            "Json pbp for game {} is either not there or can't be obtained".
            format(game_id))
        return {}
    else:
        return json.loads(response)

Beispiel #3

0

Datei anzeigen

Datei: espn_pbp.py Projekt: dbendet22/Hockey-Scraper

def get_espn_game(date, home_team, away_team, game_id=None):
    """
    Gets the ESPN pbp feed 
    Ex: http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId=400885300
    
    :param date: date of the game
    :param home_team: home team
    :param away_team: away team
    :param game_id: Game id of we already have it - for live scraping. None if not there
    
    :return: raw xml
    """
    # Get if not provided
    if not game_id:
        game_id = get_espn_game_id(date, home_team.upper(), away_team.upper())

    file_info = {
        "url": 'http://www.espn.com/nhl/gamecast/data/masterFeed?lang=en&isAll=true&gameId={}'.format(game_id),
        "name": game_id,
        "type": "espn_pbp",
        "season": shared.get_season(date),
    }
    response = shared.get_file(file_info)

    if response is None:
        raise Exception

    return response

Beispiel #4

0

Datei anzeigen

Datei: html_shifts.py Projekt: battyone/Hockey-Scraper

def get_shifts(game_id):
    """
    Given a game_id it returns a the shifts for both teams
    Ex: http://www.nhl.com/scores/htmlreports/20162017/TV020971.HTM
    
    :param game_id: the game
    
    :return: Shifts or None
    """
    game_id = str(game_id)
    venue_pgs = tuple()

    for venue in ["home", "away"]:
        venue_tag = "H" if venue == "home" else "V"
        venue_url = 'http://www.nhl.com/scores/htmlreports/{}{}/T{}{}.HTM'.format(
            game_id[:4],
            int(game_id[:4]) + 1, venue_tag, game_id[4:])

        page_info = {
            "url": venue_url,
            "name": game_id,
            "type": "html_shifts_{}".format(venue),
            "season": game_id[:4],
        }

        venue_pgs += (shared.get_file(page_info), )

    return venue_pgs

Beispiel #5

0

Datei anzeigen

Datei: json_shifts.py Projekt: barberflex/Hockey-Scraper

def get_shifts(game_id):
    """
    Given a game_id it returns the raw json
    Ex: http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId=2010020001
    
    :param game_id: the game
    
    :return: json or None
    """
    page_info = {
        "url":
        "http://www.nhl.com/stats/rest/shiftcharts?cayenneExp=gameId={}".
        format(game_id),
        "name":
        str(game_id),
        "type":
        "json_shifts",
        "season":
        str(game_id)[:4],
    }

    response = shared.get_file(page_info)

    # Return empty dict if can't get page
    if not response:
        return {}
    else:
        return json.loads(response)

Beispiel #6

0

Datei anzeigen

Datei: espn_pbp.py Projekt: riffnshred/Hockey-Scraper

def get_espn_date(date):
    """
    Get the page that contains all the games for that day
    
    :param date: YYYY-MM-DD
    
    :return: response 
    """
    page_info = {
        "url":
        'http://www.espn.com/nhl/scoreboard/_/date/{}'.format(
            date.replace('-', '')),
        "name":
        date,
        "type":
        "espn_scoreboard",
        "season":
        shared.get_season(date),
    }
    response = shared.get_file(page_info)

    # If can't get or not there throw an exception
    if not response:
        raise Exception
    else:
        return response

Beispiel #7

0

Datei anzeigen

Datei: playing_roster.py Projekt: barberflex/Hockey-Scraper

def get_roster(game_id):
    """
    Given a game_id it returns the raw html
    Ex: http://www.nhl.com/scores/htmlreports/20162017/RO020475.HTM
    
    :param game_id: the game
    
    :return: raw html of game
    """
    game_id = str(game_id)

    page_info = {
        "url":
        "http://www.nhl.com/scores/htmlreports/{}{}/RO{}.HTM".format(
            game_id[:4],
            int(game_id[:4]) + 1, game_id[4:]),
        "name":
        game_id,
        "type":
        "html_roster",
        "season":
        game_id[:4],
    }

    return shared.get_file(page_info)

Beispiel #8

0

Datei anzeigen

def test_get_file(file_info):
    """ Test getting the file...it's either scraped or loaded from a file """
    original_path = os.getcwd()

    # When there is either no directory specified or it doesn't exist
    file = shared.get_file(file_info)
    assert type(file) == str
    assert len(file) > 0
    assert original_path == os.getcwd()

    # When the directory exists
    # Here I just use the directory of this file to make things easy
    shared.add_dir(os.path.dirname(os.path.realpath(__file__)))
    file = shared.get_file(file_info)
    assert type(file) == str
    assert len(file) > 0
    assert original_path == os.getcwd()

    # Some cleanup....remove stuff created from the file directory and move back
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    shutil.rmtree("docs")
    os.chdir(original_path)

Beispiel #9

0

Datei anzeigen

Datei: html_schedule.py Projekt: barberflex/Hockey-Scraper

def get_schedule(url, name):
    """
    Given a date it returns the raw html

    :param url: url for page
    :param name: Name for saved file

    :return: raw html of game
    """
    page_info = {
        "url": url,
        "name": str(name),
        "type": "html_schedule_nwhl",
        "season": "nwhl",
    }

    return shared.get_file(page_info)

Beispiel #10

0

Datei anzeigen

Datei: json_schedule.py Projekt: GeneJuggler/Hockey-Scraper

def get_schedule(date_from, date_to):
    """
    Scrapes games in date range
    Ex: https://statsapi.web.nhl.com/api/v1/schedule?startDate=2010-10-03&endDate=2011-06-20
    
    :param date_from: scrape from this date
    :param date_to: scrape until this date
    
    :return: raw json of schedule of date range
    """
    page_info = {
        "url": 'https://statsapi.web.nhl.com/api/v1/schedule?startDate={a}&endDate={b}'.format(a=date_from, b=date_to),
        "name": date_from + "_" + date_to,
        "type": "json_schedule",
        "season": shared.get_season(date_from),
    }

    return json.loads(shared.get_file(page_info))

Beispiel #11

0

Datei anzeigen

def get_pbp(game_id):
    """
    Get the response for a game (e.g. https://www.nwhl.zone/game/get_play_by_plays?id=18507472)
    
    :param game_id: Given Game id (e.g. 18507472)
    
    :return: Json
    """
    page_info = {
        "url": 'https://www.nwhl.zone/game/get_play_by_plays?id={}'.format(game_id),
        "name": str(game_id),
        "type": "nwhl_json_pbp",
        "season": "nwhl",
    }
    response = shared.get_file(page_info)

    if not response:
        print("Json pbp for game {} is either not there or can't be obtained".format(game_id))
        return {}
    else:
        return json.loads(response)