Ejemplo n.º 1
0
def test_categorical_set_categories():
    cat = pd.Categorical(['a', 'a', 'b', 'c', 'a'], categories=['a', 'b', 'c'])
    psr = pd.Series(cat)
    sr = Series.from_categorical(cat)

    # adding category
    expect = psr.cat.set_categories(['a', 'b', 'c', 'd'])
    got = sr.cat.set_categories(['a', 'b', 'c', 'd'])
    assert_eq(expect, got)

    # removing category
    expect = psr.cat.set_categories(['a', 'b'])
    got = sr.cat.set_categories(['a', 'b'])
    assert_eq(expect, got)
Ejemplo n.º 2
0
def test_categorical_set_categories():
    cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
    psr = pd.Series(cat)
    sr = Series.from_categorical(cat)

    # adding category
    expect = psr.cat.set_categories(["a", "b", "c", "d"])
    got = sr.cat.set_categories(["a", "b", "c", "d"])
    assert_eq(expect, got)

    # removing category
    expect = psr.cat.set_categories(["a", "b"])
    got = sr.cat.set_categories(["a", "b"])
    assert_eq(expect, got)
Ejemplo n.º 3
0
 def _make_dictionary_series(self):
     """Make a dictionary-encoded series from this node
     """
     assert self.is_dictionary
     # create dictionary-encoded column
     dict_meta = self.field_schema['dictionary']
     dictid = dict_meta['id']   # start from 1
     if dict_meta['indexType']['name'] != 'int':
         msg = 'non integer type index for dictionary'
         raise MetadataParsingError(msg)
     ordered = dict_meta['isOrdered']
     # find dictionary
     for dictionary in self.schema['dictionaries']:
         if dictionary['id'] == dictid:
             break
     categories = dictionary['data']['columns'][0]['DATA']
     # make dummy categorical
     cat = pd.Categorical([], categories=categories, ordered=ordered)
     # make the series
     return Series.from_categorical(cat, codes=self.data)
Ejemplo n.º 4
0
def test_categorical_unique_count(nelem):
    from string import ascii_letters, digits

    # create categorical series
    np.random.seed(12)
    pd_cat = pd.Categorical(
        pd.Series(np.random.choice(list(ascii_letters + digits), nelem),
                  dtype='category'))

    # gdf
    gdf = DataFrame()
    gdf['a'] = Series.from_categorical(pd_cat)
    gdf_unique_count = gdf['a'].unique_count()

    # pandas
    pdf = pd.DataFrame()
    pdf['a'] = pd_cat
    pdf_unique = pdf['a'].unique()

    # verify
    assert gdf_unique_count == len(pdf_unique)
Ejemplo n.º 5
0
def test_categorical_unique(num_elements):
    from string import ascii_letters, digits

    # create categorical series
    np.random.seed(12)
    pd_cat = pd.Categorical(
        pd.Series(np.random.choice(list(ascii_letters + digits), num_elements),
                  dtype='category'))

    # gdf
    gdf = DataFrame()
    gdf['a'] = Series.from_categorical(pd_cat)
    gdf_unique_sorted = np.sort(gdf['a'].unique())

    # pandas
    pdf = pd.DataFrame()
    pdf['a'] = pd_cat
    pdf_unique_sorted = np.sort(pdf['a'].unique())

    # verify
    np.testing.assert_array_equal(pdf_unique_sorted, gdf_unique_sorted)
Ejemplo n.º 6
0
def test_categorical_value_counts(num_elements):
    from string import ascii_letters, digits

    # create categorical series
    np.random.seed(12)
    pd_cat = pd.Categorical(
        pd.Series(np.random.choice(list(ascii_letters + digits), num_elements),
                  dtype='category'))

    # gdf
    gdf = DataFrame()
    gdf['a'] = Series.from_categorical(pd_cat)
    gdf_value_counts = gdf['a'].value_counts()

    # pandas
    pdf = pd.DataFrame()
    pdf['a'] = pd_cat
    pdf_value_counts = pdf['a'].value_counts()

    # verify
    pandas_dict = pdf_value_counts.to_dict()
    gdf_dict = gdf_value_counts.to_pandas().to_dict()

    assert pandas_dict == gdf_dict