예제 #1
0
def test_data_frame_to_numeric():
    a = numpy.concatenate((
        numpy.repeat(["large"], 10),
        numpy.repeat(["small"], 5),
        numpy.repeat(["tiny"], 13),
        numpy.repeat(["medium"], 3)))
    b = numpy.concatenate((
        numpy.repeat(["yes"], 8),
        numpy.repeat(["no"], 23)))

    rnd = numpy.random.RandomState(0)
    c = rnd.randn(len(a))

    input_df = pandas.DataFrame({"a_category": a,
                                 "a_binary": b,
                                 "a_number": c.copy()})

    a_num = numpy.concatenate((
        numpy.repeat([0], 10),
        numpy.repeat([2], 5),
        numpy.repeat([3], 13),
        numpy.repeat([1], 3))).astype(numpy.int64)
    b_num = numpy.concatenate((
        numpy.repeat([1], 8),
        numpy.repeat([0], 23))).astype(numpy.int64)
    expected = pandas.DataFrame({"a_category": a_num,
                                 "a_binary": b_num,
                                 "a_number": c.copy()})

    actual = column.categorical_to_numeric(input_df)

    tm.assert_frame_equal(actual, expected, check_exact=True)
예제 #2
0
    def setUp(self):
        x, self.y = load_whas500()

        x = column.categorical_to_numeric(column.standardize(x,
                                                             with_std=False))
        self.x = x.values
        self.columns = x.columns.tolist()
예제 #3
0
 def _make_whas500(with_mean=True, with_std=True, to_numeric=False):
     x, y = load_whas500()
     if with_mean:
         x = standardize(x, with_std=with_std)
     if to_numeric:
         x = categorical_to_numeric(x)
     names = ['(Intercept)'] + x.columns.tolist()
     return DataSetWithNames(x=x.values, y=y, names=names, x_data_frame=x)
예제 #4
0
    def test_bool_series(self):
        input_series = pandas.Series([True, True, False, False, True, False, True], name="human",
                                     index=["Alpha", "Beta", "Gamma", "Delta", "Eta", "Mu", "Zeta"])
        expected = pandas.Series([1, 1, 0, 0, 1, 0, 1], name="human",
                                 index=["Alpha", "Beta", "Gamma", "Delta", "Eta", "Mu", "Zeta"])

        actual = column.categorical_to_numeric(input_series)

        tm.assert_series_equal(actual, expected, check_exact=True)
예제 #5
0
    def test_series(self):
        input_series = pandas.Series(["a", "a", "b", "b", "b", "c"], name="Thr33",
                               index=["Alpha", "Beta", "Gamma", "Delta", "Eta", "Mu"])
        expected = pandas.Series([0, 0, 1, 1, 1, 2], name="Thr33",
                                 index=["Alpha", "Beta", "Gamma", "Delta", "Eta", "Mu"])

        actual = column.categorical_to_numeric(input_series)

        tm.assert_series_equal(actual, expected, check_exact=True)
예제 #6
0
def whas500_sparse_data():
    x, y = load_whas500()
    x_dense = categorical_to_numeric(x.select_dtypes(exclude=[numpy.float_]))

    data = []
    index_i = []
    index_j = []
    for j, (_, col) in enumerate(x_dense.iteritems()):
        idx = numpy.flatnonzero(col.values)
        data.extend([1] * len(idx))
        index_i.extend(idx)
        index_j.extend([j] * len(idx))

    x_sparse = coo_matrix((data, (index_i, index_j)))
    return SparseDataSet(x_dense=x_dense, x_sparse=x_sparse, y=y)
예제 #7
0
    def setUp(self):
        x, self.y = load_whas500()
        self.x_dense = column.categorical_to_numeric(x.select_dtypes(exclude=[numpy.float_]))

        data = []
        index_i = []
        index_j = []
        for j, (_, col) in enumerate(self.x_dense.iteritems()):
            idx = numpy.flatnonzero(col.values)
            data.extend([1] * len(idx))
            index_i.extend(idx)
            index_j.extend([j] * len(idx))

        self.x_sparse = coo_matrix((data, (index_i, index_j)))
        assert_array_equal(self.x_dense.values, self.x_sparse.toarray())
예제 #8
0
 def setUp(self):
     x, self.y = load_whas500()
     self.x = categorical_to_numeric(x)