Python prepare_json_output 예제들, app.utils.prepare_json_output Python 예제들

예제 #1

0

파일 보기

파일: roster.py 프로젝트: jkereako/stats-html-scraper-api

def roster_helper(sport, team, parser_func):
    """
    Delegate function which helps query the roster for a provided team
    of a provided sport.

    :param sport: The name of the sport
    :type sport: str

    :param team: The name of the team
    :type team: str

    :param parser_func: A callback function
    :type parser_func: str

    :returns: A JSON response
    :rtype: flask.Response
    """

    team_id = get_team_id(sport, team)

    if team_id is None:
        abort(404)

    rv = fetch_cached_data(args=sport + str(team_id))

    if rv is not None:
        return rv

    soup = help_fetch_soup(
        url=ROSTER_URL.replace(SPORT_TOKEN, sport),
        request_params={
            PARAM_TEAM : team_id,
            PARAM_RESOURCE_TYPE: ARG_RESOURCE_TYPE
        }
    )
    out = prepare_json_output(help_parse_soup(soup, parser_func))

    del soup

    # Cache for 24 hours
    cache_data(data=out, args=sport + str(team_id), timeout=60 * 60 * 24)

    return out

예제 #2

0

파일 보기

파일: rankings.py 프로젝트: lauchlinmac/stats-html-scraper-api

def rankings_helper(url, parser_func):
    """
    Returns all rankings for all matches

    :param url: URL of the ranking
    :type url: str
    :param parser_func: The parsing function to be applied to the scraped
    :type parser_func: str
    :returns: A formatted dictionary ready for display
    :rtype: dict
    """

    rv = fetch_cached_data()

    if rv is not None:
        return rv

    tour = help_get_list_from_dropdown(url, attr_name="tour")

    stack = {}

    for the_round in tour:
        soup = help_fetch_soup(
            url=url,
            request_params={PARAM_TOUR : the_round}
        )

        stack[the_round] = help_parse_soup(soup, parser_func)

    return stack

    out = prepare_json_output(stack)
    del stack

    # Cache for 12 hours
    cache_data(data=out, timeout=60 * 60 * 12)

    return out

예제 #3

0

파일 보기

파일: stats.py 프로젝트: lauchlinmac/stats-html-scraper-api

def stats_helper(sport, team, parser_func):

    rv = fetch_cached_data()

    if rv is not None:
        return rv

    soup = help_fetch_soup(
        url=STATS_URL.replace(SPORT_TOKEN, sport),
        request_params={
            PARAM_TEAM: team,  # debugging for now
            PARAM_RESOURCE_TYPE: ARG_RESOURCE_TYPE
        },
        class_attrs="sortable shsTable shsBorderTable")

    out = prepare_json_output(help_parse_soup(soup, parser_func))

    del soup

    # Cache for 24 hours
    cache_data(data=out, timeout=60 * 60 * 24)

    return out

예제 #4

0

파일 보기

파일: stats.py 프로젝트: jkereako/stats-html-scraper-api

def stats_helper(sport, team, parser_func):

    rv = fetch_cached_data()

    if rv is not None:
        return rv

    soup = help_fetch_soup(
        url=STATS_URL.replace(SPORT_TOKEN, sport),
        request_params={
            PARAM_TEAM : team, # debugging for now
            PARAM_RESOURCE_TYPE: ARG_RESOURCE_TYPE
        },
        class_attrs="sortable shsTable shsBorderTable"
    )

    out = prepare_json_output(help_parse_soup(soup, parser_func))

    del soup

    # Cache for 24 hours
    cache_data(data=out, timeout=60 * 60 * 24)

    return out

예제 #5

0

파일 보기

파일: standings.py 프로젝트: lauchlinmac/stats-html-scraper-api

def standings_helper(url, league, skip_conference_row=False):
    """
    Fetches and parses data.

    Supports layouts with multiple tables or with single tables. At the
    time of writing this, the MLB standings is split into 2 tables,
    while the NHL is 1.

    :param league: The league of the desired scoreboard
    :type league: str

    :returns: A formatted dictionary ready for display
    :rtype: dict
    """

    rv = fetch_cached_data()
    if rv is not None:
        return rv

    soup = help_fetch_soup(url)

    column_list = []
    row_list = []
    stack = {}

    # Iterate over each conference/league
    for table in soup("table"):
        conference = None
        division = None

        # Iterate over each division
        for row in table("tr"):

            if row.get("class") is None:
                continue

            elif "shsTableTtlRow" in row.get("class"):
                if skip_conference_row:
                    continue

                # Single table layout support.
                # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                # If the string conference evaluates to true, then we've
                # encountered a new conference. Save the data that
                # exists in the lists column_list and row_list
                if conference:

                    # Does the list colum_stack have any data?
                    if column_list:
                        # Is this layout split into divisions?
                        if division:
                            row_list.append( { division : column_list } )
                        else:
                            row_list = column_list

                        column_list = []

                    stack[conference] = row_list
                    row_list = []

                conference = row.extract().text.strip().lower().encode("utf-8")
                conference = slugify(text=unicode(conference, "utf-8"), delimiter=u'_')

            elif "shsTableSubttlRow" in row.get("class"):
                # If the string division evaluates to true, then we've
                # encountered a new division. Save the data that
                # exists in the lists column_list and row_list
                if division:
                    # Does the list colum_stack have any data?
                    if column_list:
                        # Is this layout split into divisions?
                        if division:
                            row_list.append( { division : column_list } )
                        else:
                            row_list.append(column_list)

                        column_list = []

                division = row.extract().text.strip().lower().encode("utf-8")
                division = division.replace("division", '')
                division = slugify(text=unicode(division, "utf-8"), delimiter=u'_')

            elif any(css_class.startswith("shsRow") for css_class in row.get("class")):
                cells = row("td")
                value_dict = None

                if "mlb" == league:
                    value_dict = help_parse_mlb_soup(cells)

                elif "nhl" == league:
                    value_dict = help_parse_nhl_soup(cells)

                elif "nfl" == league:
                    value_dict = help_parse_nfl_soup(cells)

                elif "nba" == league:
                    value_dict = help_parse_nba_soup(cells)

                elif "mls" == league:
                    value_dict = help_parse_mls_soup(cells)

                elif "epl" == league:
                    value_dict = help_parse_epl_soup(cells)

                if value_dict is not None:
                    column_list.append(value_dict)

        #end for row in table("tr")

        # Get the last division in the table
        if division:
            row_list.append( { division : column_list } )

        # If there is no division, then make the columns close to the
        # conference
        else:
            row_list = column_list

        column_list = []

        # We must evaluate conference because EPL and MLS do not have
        # conferences
        if conference:
            stack[conference] = row_list

        # If a conference is nonexistent, then check for division's
        # existence. If a division exists, then treat as if it was a
        # conference (i.e. place the division at the highest level).
        # Currently, this only occurs with MLS.
        elif division:
            if row_list[0][division]:
                stack[division] = row_list[0][division]

        # Otherwise, both conference and division are nonexistent.
        # Convert stack into a list so the teams are ordered
        # accordingly.
        # Currently, this only occurs with EPL.
        else:
            # stack is a Dictionary, change it to a list
            del stack
            stack = row_list

        row_list = []

    #end for table in soup("table")
    out = prepare_json_output(stack)
    del row_list, stack

    # Cache for 2 hours
    cache_data(data=out, timeout=60 * 60 * 2)

    return out

예제 #6

0

파일 보기

def posts():
    """
    Queries posts from WordPress.com's public API implementation for NESN.com.

    This does a few things. First, it simplifies WordPress.com's complex JSON
    response from their public API. It gathers the Facebook like and comment
    count along the Tweet count for each URL. It also interfaces with
    WordPress.com's Photon service to crop images for display on mobile devices.

    :returns: JSON
    :rtype: flask.Response
    """
    rv = fetch_cached_data()

    if rv is not None:
        return jsonify(rv)

    args = {
        PARAM_WORDPRESS_POST_CATEGORY : request.args.get(PARAM_NESN_POST_CATEGORY),
        PARAM_WORDPRESS_POST_COUNT : request.args.get(PARAM_NESN_POST_COUNT)
    }

    r = requests.get(url=POSTS_URL, params=args)
    posts = r.json()

    # Were any posts found?
    if 0 == posts["found"]:
        return jsonify(message="No posts found.", status=200)

    urls = []

    # Build list of URLs to pass to Facebook's Graph tool
    for post in posts["posts"]:

        urls.append(post["URL"])

    #-- Facebook Request
    urls_str = (','.join('\'' + url + '\'' for url in urls))

    url = ARG_FQL.replace(FQL_TOKEN, urls_str)
    args = {PARAM_FACEBOOK_QUERY : url}

    r = requests.get(url=FACEBOOK_GRAPH_URL, params=args)

    fb_response = r.json()

    vals = {}
    stack = []
    # stripper = HTMLStripper()

    for post in posts["posts"]:
        categories = []
        vals["id"] = post["ID"]
        vals["author"] = post["author"]["name"]
        vals["title"] = post["title"]
        vals["published"] = int(timestamp_from_string(post["date"]))
        vals["modified"] = int(timestamp_from_string(post["modified"]))
        vals["image"] = post["featured_image"]

        # stripper.feed(post["content"])

        # vals["content"] =  stripper.get_text()
        vals["content"] = post["content"]
        vals["url"] = post["URL"]

        #-- Twitter Request
        args = {PARAM_TWITTER_URL : vals["url"]}
        r = requests.get(url=TWITTER_URLS_URL, params=args)

        vals["tweets"] = int(r.json()["count"])

        # Match up Facebook data with URL
        for link_stat in fb_response["data"]:
            if link_stat["url"] == vals["url"]:
                vals["facebook"] = {"likes" : link_stat["like_count"], "comments" : link_stat["comment_count"]}

        for category in post["categories"]:
            categories.append(category)

        vals["categories"] = categories
        stack.append(vals.copy())
        vals = {}

    del args, categories, fb_response, r, vals
    out = prepare_json_output(stack)
    del stack

    # Automatically cached for 15 minutes
    cache_data(out)

    return jsonify(out)

예제 #7

0

파일 보기

파일: scores.py 프로젝트: jkereako/stats-html-scraper-api

def scores_helper(year=None, month=None, day=None, sport=None, league=None):
    """
    Generic helper function to scrape scoring data from STATS's
    JavaScript file.

    :param year: The year of the desired scoreboard
    :type year: int

    :param month: The month of the desired scoreboard
    :type month: int

    :param day: The day of the desired scoreboard
    :type day: int

    :param sport: The sport of the desired scoreboard
    :type sport: str

    :param league: The league of the desired scoreboard
    :type league: str

    :returns: A formatted dictionary ready for display
    :rtype: dict
    """
    try:
        date_string = stats_date_string(date(year, month, day))
    except (ValueError, TypeError):
        date_string = stats_date_string()

    rv = fetch_cached_data()
    if rv is not None:
        return rv

    args = {
        PARAM_SPORT : sport,
        PARAM_DATE : date_string,
        PARAM_LEAGUE : league
    }

    soup = help_fetch_soup(
        SCORES_URL,
        request_params=args,
        source_file_type="JavaScript",
        class_attrs="shsTable"
    )

    # If there is 1 or 0 rows in the document, then, there are probably
    # no scores listed.
    if len(soup("tr")) <= 2:
        # del soup

        # Cache for a day to be safe
        out = {"message" : "No games scheduled for "}

        if not month and not day and not year:
            out["message"] += "today"
        else:
            out["message"] += "{month}/{day}/{year}".format(month=month, day=day, year=year)

        cache_data(data=out, timeout=60 * 60 * 24)

        return out

    stack = {}
    vals = []
    section = ''
    team = None
    has_the_status_cell = False

    # logcat(str(soup))

    for row in soup("tr"):

        # Rows which have team names do not have .
        # This test must be first.
        if row.get("class") is None:
            cells = row("td")
            has_the_status_cell = False

            # Does this row have a status row?
            if any(css_class in "shsMiniStatus" for cell in cells for css_class in cell.get("class") ):
                has_the_status_cell = True

            if len(cells) >= 2:

                team = "home" if team is "away" or None else "away"

                # If the list of values is empty, then initialize it
                if not vals:
                    vals.append({"away": None, "home":None})

                # If the list is complete, then append a new item
                # indicating a new game.
                elif vals[-1]["away"] and vals[-1]["home"]:
                    vals.append({"away": None, "home":None})

                # Add scoring information for the game.
                vals[-1][team] = {
                    "team": cells[0].find('a').extract().text.strip().encode("utf-8"),
                    "score": cells[1].extract().text.strip().encode("utf-8")
                }

                try:
                    # Try to convert the string to an int.
                    vals[-1][team]["score"] = int(vals[-1][team]["score"])
                except (ValueError, TypeError):
                    # If it fails, assign null
                    vals[-1][team]["score"] = None

                if has_the_status_cell:
                    status = cells[2].find('a')

                    # Arbitrary game information, such as "OT" for
                    # overtime
                    extra = cells[2].find('br')

                    time = cells[2].find(name="span", attrs={"class" : "shsTimezone shsGMTZone"})

                    # Set the status only if not null
                    if status:
                        vals[-1]["status"] = status.extract().text.strip().encode("utf-8")
                        if 2 == len(vals[-1]["status"].split('-')):
                            # Save the string to the right of '-' in
                            # extra
                            if not extra:
                                extra = vals[-1]["status"].split('-')[1].strip()
                            vals[-1]["status"] = vals[-1]["status"].split('-')[0].strip()

                        vals[-1]["status"] = vals[-1]["status"].lower()

                    if time:
                        vals[-1]["time"] = time.extract().text.strip().encode("utf-8")

                    if extra:
                        # Sometimes, extra contains a NavigableString
                        try:
                            vals[-1]["extra"] = extra.extract().text.strip().encode("utf-8")

                        # While other times, it's just a str
                        except:
                            vals[-1]["extra"] = extra

                        vals[-1]["extra"] = vals[-1]["extra"].lower()

        # Skip over the first line, it's the title
        elif "shsTableTtlRow" in row.get("class"):
            continue

        elif any(css_class in "shsTableSubttlRow shsSubSectionRow shsMiniRowSpacer" for css_class in row.get("class")):
            cell = row("td")
            section = cell[0].extract().text.strip().encode("utf-8")

            # Are the scores separated into sections? If so, find the
            # separator
            if section:
                section = slugify(text=unicode(section, "utf-8"), delimiter=u'_')
                if vals:
                    stack[section] = vals
                    vals = []
                # return section
                stack[section] = None

    # Save the last value
    else:
        if section:
            stack[section] = vals
        else:
            stack = vals

    del vals

    out = prepare_json_output(stack)

    # Cache for 1 minute
    cache_data(data=out, timeout=60)

    return out

예제 #8

0

파일 보기

def teams_helper(sport=None):
    """
    Generic helper function to scrape scoring data from STATS's
    JavaScript file.
    """

    flat_list = query_string_arg_to_bool(PARAM_FLAT_LIST)

    rv = fetch_cached_data(args=PARAM_FLAT_LIST if flat_list else None)

    if rv is not None:
        return rv

    # STATs does not order NFL teams
    nfl_teams = [
        "Atlanta Falcons", "Buffalo Bills", "Chicago Bears",
        "Cincinnati Bengals", "Cleveland Browns", "Dallas Cowboys",
        "Denver Broncos", "Detroit Lions", "Green Bay Packers",
        "Tennessee Titans", "Indianapolis Colts", "Kansas City Chiefs",
        "Oakland Raiders", "St. Louis Rams", "Miami Dolphins",
        "Minnesota Vikings", "New England Patriots", "New Orleans Saints",
        "New York Giants", "New York Jets", "Philadelphia Eagles",
        "Arizona Cardinals", "Pittsburgh Steelers", "San Diego Chargers",
        "San Francisco 49ers", "Seattle Seahawks", "Tampa Bay Buccaneers",
        "Washington Redskins", "Carolina Panthers", "Jacksonville Jaguars", '',
        '', "Baltimore Ravens", "Houston Texans"
    ]

    soup = help_fetch_soup(url=TEAMS_URL.replace(URL_TOKEN, sport))

    stack = []
    redis_stack = []
    league_stack = []
    division_stack = []
    league = None
    division = None

    # Iterate over each conference
    for table in soup("table"):

        for row in table("tr"):
            if row.get("class") is None:
                continue

            cells = row("td")

            # Conference Row
            if "shsTableTtlRow" in row.get("class"):
                if flat_list:
                    continue

                if division_stack and division:
                    league_stack.append({division: division_stack})

                    division_stack = []

                if league_stack and league:
                    stack.append({league: league_stack})

                    league_stack = []

                league = format_division(row)

            # Division Row
            elif "shsTableSubttlRow" in row.get("class"):
                if flat_list:
                    continue

                if division_stack and division:
                    league_stack.append({division: division_stack})

                    division_stack = []

                division = format_division(row)

            # Team Row
            else:
                team = cells[0].extract().text.strip().encode("utf-8")

                # Save the team as a flat list for persistent storage
                redis_stack.append(team)

                if flat_list:
                    stack.append(team)
                else:
                    division_stack.append(team)
        else:
            if division_stack and division:
                league_stack.append({division: division_stack})

                division_stack = []

            if league_stack and league:
                stack.append({league: league_stack})

                league_stack = []

    out = prepare_json_output(stack)
    del soup, division_stack, league_stack, stack

    redis_key = app.config["REDIS_KEY_TEAMS"].replace(
        app.config["REDIS_KEY_TOKEN_SPORT"], "nfl" if "fb" == sport else sport)

    if not redis.exists(redis_key):
        if "fb" == sport:
            redis_stack = nfl_teams

        # Convert the object to a JSON string
        redis.set(name=redis_key,
                  value=dumps(prepare_json_output(redis_stack)))

    del redis_key, redis_stack

    cache_data(
        data=out,
        args=PARAM_FLAT_LIST if flat_list else None,
        timeout=60 * 60 * 24 * 300  # Cache for 300 days
    )

    return out

예제 #9

0

파일 보기

def schedule_helper(sport, team, from_month=None, to_month=None, parser_func=None):
    """
    Returns all rankings for all matches

    TODO: Support filtering operations passed via query string.

    :param url: URL of the ranking
    :type url: str

    :param team: The ID of the team
    :type team: int

    :param from_month: The month number from which the schedule begins
    :type from_month:  int

    :param to_month: The month number for which the schedule terminates
    :type to_month:  int

    :param parse_func: The parsing function to be applied to the scraped
    :type parse_func: str

    :returns: A formatted dictionary ready for display
    :rtype: dict
    """

    team_id = get_team_id(sport, team)

    if team_id is None:
        abort(404)

    rv = fetch_cached_data(args=sport + str(team_id))

    if rv is not None:
        return rv

    url = SCHEDULE_URL.replace(SPORT_TOKEN, sport)

    stack = []

    # At this time, the NFL schedule is not listed by month.
    if from_month is None and to_month is None:
        args = {
            PARAM_TEAM : format_int_for_stats(team_id),
            PARAM_RESOURCE_TYPE: ARG_RESOURCE_TYPE
        }
        soup = help_fetch_soup(url, request_params=args)

        # Only use the first table
        stack = help_parse_soup(soup("table")[0], parser_func)

    # Iterate through schedules which have a separate URL for each month
    else:
        # To increase readability, we allow the caller function to define
        # from_month and to_month in a familiar format. However, if the
        # values of from_month and to_month are 9 to 6 respectively, then it
        # becomes impossible to build an xrange. To, correct this, we ensure
        # the value of to_month is always greater than the value of
        # from_month by increasing its value 12 and then taking
        # the mod base 12 later on down the river.

        to_month = to_month + 12 if to_month < from_month else to_month

        for month in xrange(from_month, to_month):
            # Build the argument list for STATS.
            args = {
                PARAM_TEAM : format_int_for_stats(team_id),
                PARAM_RESOURCE_TYPE: ARG_RESOURCE_TYPE,
                PARAM_MONTH : format_month_number_for_stats(month, pad_with_zero=True)
            }

            # http://stackoverflow.com/questions/15871769/using-beautiful-soup-grabbing-stuff-between-li-and-li
            soup = help_fetch_soup(url, request_params=args)

            # Must use += to make this a flat list
            stack += help_parse_soup(soup, parser_func, format_month_number_for_stats(month))
            # stack[month] = help_parse_soup(soup, parser_func, month)


    out = prepare_json_output(stack)
    cache_data(data=out, args=sport + str(team_id), timeout=CACHE_TIMEOUT)
    return out

예제 #10

0

파일 보기

파일: injuries.py 프로젝트: lauchlinmac/stats-html-scraper-api

def mlb():
    # Because this object does not take any arguments, always cache

    rv = fetch_cached_data(cache_key="mlb_injuries")

    if rv is not None:
        return jsonify(rv)

    r = requests.get("http://stats.nesn.com/mlb/stats.asp?file=recentinj")
    raw_string = re.sub(r"\s+", ' ', r.text)

    # http://stackoverflow.com/questions/15871769/using-beautiful-soup-grabbing-stuff-between-li-and-li
    soup = BeautifulSoup(raw_string,
                         from_encoding="UTF-8",
                         parse_only=SoupStrainer(
                             name="div", attrs={"id": "shsMLBrecentinj"}))
    del r, raw_string

    # for e in soup.findAll('br'):
    #     e.extract()

    team = None
    vals = {}
    stack = []
    team_stack = []

    # Remove title
    iter_soup = soup(["h2", "table"])
    iter_soup.pop(0)
    iter_soup.pop(0)

    # return str(iter_soup)

    for item in iter_soup:

        if item is None:
            continue

        # The team name
        elif "shsTableTitle" in item.get("class"):
            team = item.extract().text.encode("utf-8").lower().replace(
                ' ', '_')

        # The important data
        else:
            for row in item("tr"):

                # The title row... Date, Player, Status
                if "shsTableTtlRow" in row.get("class"):
                    continue

                cells = row("td")
                vals["ts"] = int(
                    timestamp_from_string(
                        cells[0].extract().text.encode("utf-8")))
                vals["player"] = cells[1].extract().text.encode("utf-8")
                vals["status"] = cells[2].extract().text.encode("utf-8")

                team_stack.append(vals.copy())

            if team_stack:
                stack.append({team: team_stack})
                team_stack = []

    out = prepare_json_output(stack)

    # Cache for 12 hours
    cache_data(data=out, timeout=60 * 60 * 12)

    return jsonify(out)