Example #1
0
def get_fangraphs_tabular_data_from_html(
        html: Union[str, bytes],
        column_name_mapper: Callable = None,
        known_percentages: List[str] = []) -> pd.DataFrame:
    xpath: str = '//table[@class="rgMasterTable"]'
    html_dom = lxml.etree.HTML(html)

    headings_xpath = f"({xpath}/thead//th[contains(@class, 'rgHeader')])[position()>1]/descendant-or-self::*/text()"
    headings = html_dom.xpath(headings_xpath)

    if column_name_mapper:
        headings = [column_name_mapper(h) for h in headings]

    data_rows_xpath = f"({xpath}/tbody//tr)"
    data_rows_dom = html_dom.xpath(data_rows_xpath)
    data_rows = [[
        postprocessing.try_parse(y,
                                 headings[index],
                                 known_percentages=known_percentages)
        for index, y in enumerate(
            x.xpath('td[position()>1]/descendant-or-self::*/text()'))
    ] for x in data_rows_dom]

    fg_data = pd.DataFrame(data_rows, columns=headings)

    return fg_data
Example #2
0
def test_try_parse_long_date() -> None:
    expected_datetime = datetime(year=2020,
                                 month=9,
                                 day=3,
                                 hour=5,
                                 minute=40,
                                 second=30,
                                 microsecond=210000)
    assert postprocessing.try_parse('2020-09-03T05:40:30.210Z',
                                    'game_dt') == expected_datetime
 def test_try_parse_percentage_column(self):
     assert postprocessing.try_parse('50', 'CS%') == 0.5
 def test_try_parse_percentage_value(self):
     assert postprocessing.try_parse('10%', 'avg') == 0.1
 def test_try_parse_float(self):
     assert postprocessing.try_parse('1.0', 'runs') == 1.0
 def test_try_parse_int(self):
     assert postprocessing.try_parse('1', 'runs') == 1
Example #7
0
def test_try_parse_null() -> None:
    assert pd.isna(postprocessing.try_parse(None, 'runs'))
Example #8
0
def test_try_parse_percentage_column_known() -> None:
    assert postprocessing.try_parse('50', 'CS',
                                    known_percentages=['CS']) == 0.5
Example #9
0
def test_try_parse_float_nonstr() -> None:
    assert postprocessing.try_parse(1.0, 'runs') == 1.0
Example #10
0
def test_try_parse_float() -> None:
    assert postprocessing.try_parse('1.0', 'runs') == 1.0
Example #11
0
def test_try_parse_int_nonstr() -> None:
    assert postprocessing.try_parse(1, 'runs') == 1
Example #12
0
def test_try_parse_int() -> None:
    assert postprocessing.try_parse('1', 'runs') == 1
Example #13
0
def test_try_parse_date_nonstr() -> None:
    expected = datetime(year=2020, month=9, day=4)
    assert postprocessing.try_parse(expected, 'game_dt') == expected
Example #14
0
def test_try_parse_short_date() -> None:
    assert postprocessing.try_parse('2020-09-04',
                                    'game_dt') == datetime(year=2020,
                                                           month=9,
                                                           day=4)
 def test_try_parse_int_nonstr(self):
     assert postprocessing.try_parse(1, 'runs') == 1
 def test_try_parse_date_nonstr(self):
     dt = datetime(year=2020, month=9, day=4)
     assert postprocessing.try_parse(dt, 'game_dt') == dt