def whas500_without_ties(): # naive survival SVM does resolve ties in survival time differently, # therefore use data without ties data = loadarff(WHAS500_NOTIES_FILE) x, y = get_x_y(data, ['fstat', 'lenfol'], '1') x = encode_categorical(x) return x, y
def test_dataframe(self): contents = "".join(EXPECTED_1) with StringIO(contents) as fp: actual_df = loadarff(fp) expected_df = pandas.DataFrame.from_items( [("attr_nominal", pandas.Series(["water", "wine", "beer", None, "wine", "water"]).astype("category")), ("attr_nominal_spaces", pandas.Series(['"red wine"', '"hard liquor"', None, "mate", '"hard liquor"', "mate"]).astype("category")) ] ) tm.assert_frame_equal(expected_df, actual_df, check_exact=True)
def test_loadarff_dataframe(): contents = "".join(EXPECTED_1) with StringIO(contents) as fp: actual_df = loadarff(fp) expected_df = pandas.DataFrame.from_dict(OrderedDict( [("attr_nominal", pandas.Series(pandas.Categorical.from_codes( [1, 2, 0, -1, 2, 1], ["beer", "water", "wine"]))), ("attr_nominal_spaces", pandas.Series(pandas.Categorical.from_codes( [2, 0, -1, 1, 0, 1], ['"hard liquor"', 'mate', '"red wine"']))) ] )) tm.assert_frame_equal(expected_df, actual_df, check_exact=True)
def setUp(self): # naive survival SVM does resolve ties in survival time differently, # therefore use data without ties data = loadarff(WHAS500_NOTIES_FILE) x, self.y = get_x_y(data, ['fstat', 'lenfol'], '1') self.x = encode_categorical(x)