Exemple #1
0
    def test_pivot(self):
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(
            index='index', columns='columns', values='values')

        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })
        expected.index.name, expected.columns.name = 'index', 'columns'

        assert_frame_equal(pivoted, expected)

        # name tracking
        self.assertEqual(pivoted.index.name, 'index')
        self.assertEqual(pivoted.columns.name, 'columns')

        # don't specify values
        pivoted = frame.pivot(index='index', columns='columns')
        self.assertEqual(pivoted.index.name, 'index')
        self.assertEqual(pivoted.columns.names, (None, 'columns'))

        # pivot multiple columns
        wp = tm.makePanel()
        lp = wp.to_frame()
        df = lp.reset_index()
        assert_frame_equal(df.pivot('major', 'minor'), lp.unstack())
Exemple #2
0
    def test_pivot_periods(self):
        df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
                               pd.Period('2013-01-02', 'D'),
                               pd.Period('2013-01-01', 'D'),
                               pd.Period('2013-01-02', 'D')],
                        'p2': [pd.Period('2013-01', 'M'),
                               pd.Period('2013-01', 'M'),
                               pd.Period('2013-02', 'M'),
                               pd.Period('2013-02', 'M')],
                        'data1': np.arange(4, dtype='int64'),
                        'data2': np.arange(4, dtype='int64')})

        exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
        exp_col2 = pd.PeriodIndex(['2013-01', '2013-02'] * 2,
                                  name='p2', freq='M')
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
                             index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
                                                  name='p1', freq='D'),
                             columns=exp_col)

        pv = df.pivot(index='p1', columns='p2')
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame([[0, 2], [1, 3]],
                             index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
                                                  name='p1', freq='D'),
                             columns=pd.PeriodIndex(['2013-01', '2013-02'],
                                                    name='p2', freq='M'))

        pv = df.pivot(index='p1', columns='p2', values='data1')
        tm.assert_frame_equal(pv, expected)
    def test_pivot(self):
        data = {
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(index="index", columns="columns", values="values")

        expected = DataFrame({"One": {"A": 1.0, "B": 2.0, "C": 3.0}, "Two": {"A": 1.0, "B": 2.0, "C": 3.0}})
        expected.index.name, expected.columns.name = "index", "columns"

        assert_frame_equal(pivoted, expected)

        # name tracking
        self.assertEqual(pivoted.index.name, "index")
        self.assertEqual(pivoted.columns.name, "columns")

        # don't specify values
        pivoted = frame.pivot(index="index", columns="columns")
        self.assertEqual(pivoted.index.name, "index")
        self.assertEqual(pivoted.columns.names, (None, "columns"))

        # pivot multiple columns
        wp = tm.makePanel()
        lp = wp.to_frame()
        df = lp.reset_index()
        assert_frame_equal(df.pivot("major", "minor"), lp.unstack())
Exemple #4
0
    def test_pivot(self):
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(
            index='index', columns='columns', values='values')

        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        tm.assert_frame_equal(pivoted, expected)

        # name tracking
        assert pivoted.index.name == 'index'
        assert pivoted.columns.name == 'columns'

        # don't specify values
        pivoted = frame.pivot(index='index', columns='columns')
        assert pivoted.index.name == 'index'
        assert pivoted.columns.names == (None, 'columns')
    def test_pivot_index_with_nan(self):
        # GH 3588
        nan = np.nan
        df = DataFrame({'a': ['R1', 'R2', nan, 'R4'],
                        'b': ['C1', 'C2', 'C3', 'C4'],
                        'c': [10, 15, 17, 20]})
        result = df.pivot('a', 'b', 'c')
        expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
                              [nan, 15, nan, nan], [nan, nan, nan, 20]],
                             index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
                             columns=Index(['C1', 'C2', 'C3', 'C4'], name='b'))
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)

        # GH9491
        df = DataFrame({'a': pd.date_range('2014-02-01', periods=6, freq='D'),
                        'c': 100 + np.arange(6)})
        df['b'] = df['a'] - pd.Timestamp('2014-02-02')
        df.loc[1, 'a'] = df.loc[3, 'a'] = nan
        df.loc[1, 'b'] = df.loc[4, 'b'] = nan

        pv = df.pivot('a', 'b', 'c')
        self.assertEqual(pv.notnull().values.sum(), len(df))

        for _, row in df.iterrows():
            self.assertEqual(pv.loc[row['a'], row['b']], row['c'])

        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
Exemple #6
0
    def test_pivot_index_none(self):
        # gh-3962
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data).set_index('index')
        result = frame.pivot(columns='columns', values='values')
        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns='columns')

        expected.columns = pd.MultiIndex.from_tuples([('values', 'One'),
                                                      ('values', 'Two')],
                                                     names=[None, 'columns'])
        expected.index.name = 'index'
        tm.assert_frame_equal(result, expected, check_names=False)
        assert result.index.name == 'index'
        assert result.columns.names == (None, 'columns')
        expected.columns = expected.columns.droplevel(0)
        result = frame.pivot(columns='columns', values='values')

        expected.columns.name = 'columns'
        tm.assert_frame_equal(result, expected)
Exemple #7
0
    def test_pivot(self):
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(
            index='index', columns='columns', values='values')

        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        tm.assert_frame_equal(pivoted, expected)

        # name tracking
        assert pivoted.index.name == 'index'
        assert pivoted.columns.name == 'columns'

        # don't specify values
        pivoted = frame.pivot(index='index', columns='columns')
        assert pivoted.index.name == 'index'
        assert pivoted.columns.names == (None, 'columns')

        with catch_warnings(record=True):
            # pivot multiple columns
            simplefilter("ignore", FutureWarning)
            wp = tm.makePanel()
            lp = wp.to_frame()
            df = lp.reset_index()
            tm.assert_frame_equal(df.pivot('major', 'minor'), lp.unstack())
Exemple #8
0
    def test_pivot(self):
        data = {
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(index="index", columns="columns", values="values")

        expected = DataFrame(
            {
                "One": {"A": 1.0, "B": 2.0, "C": 3.0},
                "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
            }
        )

        expected.index.name, expected.columns.name = "index", "columns"
        tm.assert_frame_equal(pivoted, expected)

        # name tracking
        assert pivoted.index.name == "index"
        assert pivoted.columns.name == "columns"

        # don't specify values
        pivoted = frame.pivot(index="index", columns="columns")
        assert pivoted.index.name == "index"
        assert pivoted.columns.names == (None, "columns")
Exemple #9
0
    def test_pivot_index_with_nan(self):
        # GH 3588
        nan = np.nan
        df = DataFrame({
            'a': ['R1', 'R2', nan, 'R4'],
            'b': ['C1', 'C2', 'C3', 'C4'],
            'c': [10, 15, 17, 20]
        })
        result = df.pivot('a', 'b', 'c')
        expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
                              [nan, 15, nan, nan], [nan, nan, nan, 20]],
                             index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
                             columns=Index(['C1', 'C2', 'C3', 'C4'], name='b'))
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)

        # GH9491
        df = DataFrame({
            'a': pd.date_range('2014-02-01', periods=6, freq='D'),
            'c': 100 + np.arange(6)
        })
        df['b'] = df['a'] - pd.Timestamp('2014-02-02')
        df.loc[1, 'a'] = df.loc[3, 'a'] = nan
        df.loc[1, 'b'] = df.loc[4, 'b'] = nan

        pv = df.pivot('a', 'b', 'c')
        self.assertEqual(pv.notnull().values.sum(), len(df))

        for _, row in df.iterrows():
            self.assertEqual(pv.loc[row['a'], row['b']], row['c'])

        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
Exemple #10
0
    def test_pivot_index_none(self):
        # gh-3962
        data = {
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        frame = DataFrame(data).set_index("index")
        result = frame.pivot(columns="columns", values="values")
        expected = DataFrame(
            {
                "One": {"A": 1.0, "B": 2.0, "C": 3.0},
                "Two": {"A": 1.0, "B": 2.0, "C": 3.0},
            }
        )

        expected.index.name, expected.columns.name = "index", "columns"
        tm.assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns="columns")

        expected.columns = pd.MultiIndex.from_tuples(
            [("values", "One"), ("values", "Two")], names=[None, "columns"]
        )
        expected.index.name = "index"
        tm.assert_frame_equal(result, expected, check_names=False)
        assert result.index.name == "index"
        assert result.columns.names == (None, "columns")
        expected.columns = expected.columns.droplevel(0)
        result = frame.pivot(columns="columns", values="values")

        expected.columns.name = "columns"
        tm.assert_frame_equal(result, expected)
Exemple #11
0
def preprocess(df: pd.DataFrame):
    # Pivot to create feature columns of level2 prices and volumes

    df["num"] = df.groupby("datetime").cumcount() + 1
    price_pivoted = df.pivot(index="datetime", columns="num", values="price")
    price_pivoted.columns = "price" + price_pivoted.columns.astype(str)
    price_pivoted["base"] = (price_pivoted["price10"] +
                             price_pivoted["price11"]) / 2
    for n in range(
            1,
            len([c for c in price_pivoted.columns if c.startswith("price")]) +
            1):
        col = "price" + str(n)
        price_pivoted[col] = price_pivoted[col] - price_pivoted["base"]

    bid_vol_pivoted = df.pivot(index="datetime",
                               columns="num",
                               values="bid_vol")
    bid_vol_pivoted.columns = "bid_vol" + bid_vol_pivoted.columns.astype(str)

    ask_vol_pivoted = df.pivot(index="datetime",
                               columns="num",
                               values="ask_vol")
    ask_vol_pivoted.columns = "as_vol" + ask_vol_pivoted.columns.astype(str)

    pivoted = price_pivoted.join(bid_vol_pivoted).join(ask_vol_pivoted)
    p = Pipeline()
    return pivoted
Exemple #12
0
    def test_pivot(self):
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(
            index='index', columns='columns', values='values')

        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        tm.assert_frame_equal(pivoted, expected)

        # name tracking
        assert pivoted.index.name == 'index'
        assert pivoted.columns.name == 'columns'

        # don't specify values
        pivoted = frame.pivot(index='index', columns='columns')
        assert pivoted.index.name == 'index'
        assert pivoted.columns.names == (None, 'columns')

        with catch_warnings(record=True):
            # pivot multiple columns
            wp = tm.makePanel()
            lp = wp.to_frame()
            df = lp.reset_index()
            tm.assert_frame_equal(df.pivot('major', 'minor'), lp.unstack())
Exemple #13
0
    def test_pivot_index_none(self):
        # gh-3962
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data).set_index('index')
        result = frame.pivot(columns='columns', values='values')
        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns='columns')

        expected.columns = pd.MultiIndex.from_tuples([('values', 'One'),
                                                      ('values', 'Two')],
                                                     names=[None, 'columns'])
        expected.index.name = 'index'
        tm.assert_frame_equal(result, expected, check_names=False)
        assert result.index.name == 'index'
        assert result.columns.names == (None, 'columns')
        expected.columns = expected.columns.droplevel(0)
        result = frame.pivot(columns='columns', values='values')

        expected.columns.name = 'columns'
        tm.assert_frame_equal(result, expected)
Exemple #14
0
def bar(df: pd.DataFrame, legend=False, values="recall", is_fs=True, is_class=True):
    p1 = df.pivot(
        index="clf",
        columns="features",
        values=values
    )
    if is_class:
        print(p1)
        p1.plot.bar(legend=legend, ylim=(0.8, 1.0), figsize=(10, 3),
                    title="{} : classifiers / feature sets".format(values))
        ax1 = plt.axes()
        x_axis = ax1.axes.get_xaxis()
        x_label = x_axis.get_label()
        x_label.set_visible(False)
        plt.show()

    if is_fs:
        p2 = df.pivot(
            index="features",
            columns="clf",
            values=values
        )
        print(p2)
        p2.plot.bar(legend=legend, ylim=(0.8, 1.0), figsize=(10, 3),
                    title="{} : feature sets / classifiers".format(values))
        ax1 = plt.axes()
        x_axis = ax1.axes.get_xaxis()
        x_label = x_axis.get_label()
        x_label.set_visible(False)
        plt.show()
    def test_pivot_index_none(self):
        # gh-3962
        data = {
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        frame = DataFrame(data).set_index("index")
        result = frame.pivot(columns="columns", values="values")
        expected = DataFrame({"One": {"A": 1.0, "B": 2.0, "C": 3.0}, "Two": {"A": 1.0, "B": 2.0, "C": 3.0}})

        expected.index.name, expected.columns.name = "index", "columns"
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns="columns")

        expected.columns = pd.MultiIndex.from_tuples([("values", "One"), ("values", "Two")], names=[None, "columns"])
        expected.index.name = "index"
        assert_frame_equal(result, expected, check_names=False)
        self.assertEqual(result.index.name, "index")
        self.assertEqual(result.columns.names, (None, "columns"))
        expected.columns = expected.columns.droplevel(0)

        data = {
            "index": range(7),
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        result = frame.pivot(columns="columns", values="values")

        expected.columns.name = "columns"
        assert_frame_equal(result, expected)
Exemple #16
0
    def test_pivot_index_with_nan(self):
        # GH 3588
        nan = np.nan
        df = DataFrame({"a": ["R1", "R2", nan, "R4"], "b": ["C1", "C2", "C3", "C4"], "c": [10, 15, 17, 20]})
        result = df.pivot("a", "b", "c")
        expected = DataFrame(
            [[nan, nan, 17, nan], [10, nan, nan, nan], [nan, 15, nan, nan], [nan, nan, nan, 20]],
            index=Index([nan, "R1", "R2", "R4"], name="a"),
            columns=Index(["C1", "C2", "C3", "C4"], name="b"),
        )
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T)

        # GH9491
        df = DataFrame({"a": pd.date_range("2014-02-01", periods=6, freq="D"), "c": 100 + np.arange(6)})
        df["b"] = df["a"] - pd.Timestamp("2014-02-02")
        df.loc[1, "a"] = df.loc[3, "a"] = nan
        df.loc[1, "b"] = df.loc[4, "b"] = nan

        pv = df.pivot("a", "b", "c")
        self.assertEqual(pv.notnull().values.sum(), len(df))

        for _, row in df.iterrows():
            self.assertEqual(pv.loc[row["a"], row["b"]], row["c"])

        tm.assert_frame_equal(df.pivot("b", "a", "c"), pv.T)
Exemple #17
0
 def test_pivot_duplicates(self):
     data = DataFrame({
         "a": ["bar", "bar", "foo", "foo", "foo"],
         "b": ["one", "two", "one", "one", "two"],
         "c": [1.0, 2.0, 3.0, 3.0, 4.0],
     })
     with pytest.raises(ValueError, match="duplicate entries"):
         data.pivot("a", "b", "c")
Exemple #18
0
 def test_pivot_duplicates(self):
     data = DataFrame({
         'a': ['bar', 'bar', 'foo', 'foo', 'foo'],
         'b': ['one', 'two', 'one', 'one', 'two'],
         'c': [1., 2., 3., 3., 4.]
     })
     with pytest.raises(ValueError, match='duplicate entries'):
         data.pivot('a', 'b', 'c')
Exemple #19
0
 def test_pivot_duplicates(self):
     data = DataFrame({
         'a': ['bar', 'bar', 'foo', 'foo', 'foo'],
         'b': ['one', 'two', 'one', 'one', 'two'],
         'c': [1., 2., 3., 3., 4.]
     })
     with tm.assert_raises_regex(ValueError, 'duplicate entries'):
         data.pivot('a', 'b', 'c')
 def test_pivot_duplicates(self):
     data = DataFrame(
         {
             "a": ["bar", "bar", "foo", "foo", "foo"],
             "b": ["one", "two", "one", "one", "two"],
             "c": [1.0, 2.0, 3.0, 3.0, 4.0],
         }
     )
     with assertRaisesRegexp(ValueError, "duplicate entries"):
         data.pivot("a", "b", "c")
Exemple #21
0
class Pivot:

    def setup(self):
        N = 10000
        index = date_range('1/1/2000', periods=N, freq='h')
        data = {'value': np.random.randn(N * 50),
                'variable': np.arange(50).repeat(N),
                'date': np.tile(index.values, 50)}
        self.df = DataFrame(data)

    def time_reshape_pivot_time_series(self):
        self.df.pivot('date', 'variable', 'value')
Exemple #22
0
class Pivot(object):

    def setup(self):
        N = 10000
        index = date_range('1/1/2000', periods=N, freq='h')
        data = {'value': np.random.randn(N * 50),
                'variable': np.arange(50).repeat(N),
                'date': np.tile(index.values, 50)}
        self.df = DataFrame(data)

    def time_reshape_pivot_time_series(self):
        self.df.pivot('date', 'variable', 'value')
Exemple #23
0
 def test_pivot_index_with_nan(self):
     # GH 3588
     nan = np.nan
     df = DataFrame({'a':['R1', 'R2', nan, 'R4'],
                     'b':['C1', 'C2', 'C3' , 'C4'],
                     'c':[10, 15, 17, 20]})
     result = df.pivot('a','b','c')
     expected = DataFrame([[nan,nan,17,nan],[10,nan,nan,nan],
                           [nan,15,nan,nan],[nan,nan,nan,20]],
                          index = Index([nan,'R1','R2','R4'],name='a'),
                          columns = Index(['C1','C2','C3','C4'],name='b'))
     tm.assert_frame_equal(result, expected)
     tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)
Exemple #24
0
class Pivot:
    def setup(self):
        N = 10000
        index = date_range("1/1/2000", periods=N, freq="h")
        data = {
            "value": np.random.randn(N * 50),
            "variable": np.arange(50).repeat(N),
            "date": np.tile(index.values, 50),
        }
        self.df = DataFrame(data)

    def time_reshape_pivot_time_series(self):
        self.df.pivot("date", "variable", "value")
Exemple #25
0
    def test_pivot_with_tz(self):
        # GH 5878
        df = DataFrame({
            'dt1': [
                datetime(2013, 1, 1, 9, 0),
                datetime(2013, 1, 2, 9, 0),
                datetime(2013, 1, 1, 9, 0),
                datetime(2013, 1, 2, 9, 0)
            ],
            'dt2': [
                datetime(2014, 1, 1, 9, 0),
                datetime(2014, 1, 1, 9, 0),
                datetime(2014, 1, 2, 9, 0),
                datetime(2014, 1, 2, 9, 0)
            ],
            'data1':
            np.arange(4, dtype='int64'),
            'data2':
            np.arange(4, dtype='int64')
        })

        df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific'))
        df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo'))

        exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
        exp_col2 = pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'] *
                                    2,
                                    name='dt2',
                                    tz='Asia/Tokyo')
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
                             index=pd.DatetimeIndex(
                                 ['2013/01/01 09:00', '2013/01/02 09:00'],
                                 name='dt1',
                                 tz='US/Pacific'),
                             columns=exp_col)

        pv = df.pivot(index='dt1', columns='dt2')
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame(
            [[0, 2], [1, 3]],
            index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
                                   name='dt1',
                                   tz='US/Pacific'),
            columns=pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'],
                                     name='dt2',
                                     tz='Asia/Tokyo'))

        pv = df.pivot(index='dt1', columns='dt2', values='data1')
        tm.assert_frame_equal(pv, expected)
Exemple #26
0
 def test_pivot_index_with_nan(self):
     # GH 3588
     nan = np.nan
     df = DataFrame({
         'a': ['R1', 'R2', nan, 'R4'],
         'b': ['C1', 'C2', 'C3', 'C4'],
         'c': [10, 15, 17, 20]
     })
     result = df.pivot('a', 'b', 'c')
     expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
                           [nan, 15, nan, nan], [nan, nan, nan, 20]],
                          index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
                          columns=Index(['C1', 'C2', 'C3', 'C4'], name='b'))
     tm.assert_frame_equal(result, expected)
     tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)
Exemple #27
0
    def pipe_age_filter_entries(self, df: pd.DataFrame) -> pd.DataFrame:
        """More granular filter. Keep entries where data is deemed reliable.

        1. Checks field ALL is equal to sum of all other ages (within 5% error). If not filters rows out.
        2. If percentage of unknown doses is above 5% of total doses, filters row out.
        """
        # Find valid dates + location
        x = df.pivot(index=["date", "location"],
                     columns="age_group",
                     values="total_vaccinations").reset_index()
        x = x.dropna(subset=AGE_GROUPS_MUST_HAVE, how="any")
        # Create debug variable (= sum of all ages)
        x = x.assign(debug_u18=x[AGE_GROUP_UNDERAGE_LEVELS["lvl0"]].fillna(x[
            AGE_GROUP_UNDERAGE_LEVELS["lvl1"]].sum(axis=1)))
        x = x.assign(debug=x[AGE_GROUPS_MUST_HAVE].sum(axis=1) + x.debug_u18)
        x = x.assign(
            debug_diff=x.ALL - x.debug,
            debug_diff_perc=(x.ALL - x.debug) / x.ALL,
        )
        threshold_missmatch_ratio = 0.05  # Keep only those days where missmatch between sum(ages) and total is <5%
        x = x[x.debug_diff_perc <= threshold_missmatch_ratio]
        valid_entries_ids = x[["date", "location"]]
        if not valid_entries_ids.value_counts().max() == 1:
            raise ValueError("Some entries appear to be duplicated")
        df = df.merge(valid_entries_ids, on=["date", "location"])

        # Filter entries with too many unknown doses (where more 5% of doses are unknown)
        threshold_unknown_doses_ratio = 0.05
        df = df[(df.UnknownDose /
                 df.total_vaccinations) < threshold_unknown_doses_ratio]
        return df
def print_survival_rate(df):
    for domain_path, domain_group in df.groupby(["domainPath"]):
        survival_results = DataFrame(columns="actionDuration algorithmName survival lbound rbound".split())
        domain_name = re.search("[^/]+$", domain_path).group(0).rstrip(".track")

        for fields, action_group in domain_group.groupby(['algorithmName', 'actionDuration']):
            total_trials = len(action_group)
            error_experiments = action_group[action_group["errorMessage"].notnull()]

            deaths = len(error_experiments[error_experiments["errorMessage"] != "Timeout"])
            timeouts = len(error_experiments) - deaths
            successes = len(action_group[~action_group["errorMessage"].notnull()])

            survival_confint = proportion_confint(successes, total_trials, 0.05)
            survival_rate = (successes / (successes + deaths))
            survival_results = add_row(survival_results,
                                      [fields[1], fields[0], survival_rate, survival_confint[0], survival_confint[1]])


        fig, ax = plt.subplots()
        errors = []
        for alg, alg_group in survival_results.groupby('algorithmName'):
            errors.append([(alg_group['lbound'] - alg_group['survival']).values,
                           (alg_group['rbound'].values - alg_group['survival']).values])
        errors = np.abs(errors)
        print(errors)
        survival = survival_results.pivot(index='actionDuration', columns='algorithmName', values='survival')

        survival.plot(ax=ax, yerr=errors,
                      xlim=[0, 7000], ylim=[0, 1.0],
                      capsize=4, capthick=1, ecolor='black', cmap=plt.get_cmap("rainbow"), elinewidth=1)

        plt.savefig('test.png', format='png')
def main():
    data = construct_data_frame(read_data("../output/results.json"))
    set_rc()


    data.drop(['commitmentType', "success", "timeLimit",
               "terminationType", 'timestamp', 'octileMovement', 'lookaheadType',
               'firstIterationDuration', 'generatedNodes', 'expandedNodes', 'domainInstanceName', 'domain_name',
               'planningTime'],
              axis=1,
              inplace=True,
              errors='ignore')


    # this is a fix for the traffic domain which does not have domainSeed values, so I have to fake it
    if 'domainSeed' not in data:
        data['domainSeed'] = data['domainPath']
        data['domainPath'] = 'vehicle'



    # get min and max ranges for actionDuration for plotting later
    min_range = data.min()['actionDuration']
    max_range = data.max()['actionDuration']

    sns.set_style("white")

    # print_survival_rate(data)
    data = data[~data['errorMessage'].notnull()]
    data.sort_values(['domainPath', 'actionDuration'], ascending=True, inplace=True)

    astar = data[data["algorithmName"] == "A_STAR"]
    astar["opt"] = astar["actionDuration"] * astar["pathLength"]
    astar = astar[["domainPath", "domainSeed", "opt", "actionDuration"]]
    data = pd.merge(data, astar, how='inner', on=['domainPath', 'domainSeed', 'actionDuration'])
    data["withinOpt"] = data["goalAchievementTime"] / data["opt"]

    for domain_path, domain_group in data.groupby(["domainPath"]):
        results = DataFrame(columns="actionDuration algorithmName withinOpt lbound rbound".split())
        domain_name = re.search("[^/]+$", domain_path).group(0).rstrip(".track")

        for fields, action_group in domain_group.groupby(['algorithmName', 'actionDuration']):

            bound = sms.DescrStatsW(action_group["withinOpt"]).tconfint_mean()
            mean = action_group["withinOpt"].mean()
            results = add_row(results, [fields[1], fields[0], mean, abs(mean - bound[0]), abs(mean - bound[1])])

        fig, ax = plt.subplots()
        errors = []
        for alg, alg_group in results.groupby('algorithmName'):
            errors.append([alg_group['lbound'].values, alg_group['rbound'].values])

        pivot = results.pivot(index='actionDuration', columns='algorithmName', values='withinOpt')
        plot = pivot.plot(ax=ax, yerr=errors,
                      capsize=4, capthick=1, ecolor='black', cmap=plt.get_cmap("rainbow"), elinewidth=1)
        plot.legend(title="Planners", shadow=True, frameon=True, framealpha=1.0, facecolor='lightgrey')


        format_plot(plot)
        plt.savefig(domain_name + ".png", format='png')
    def _create_heatmap_df(
        self,
        df: pd.DataFrame,
        lookback: int = 5,
        query: Union[str, dict] = None,
        fill_null_days: bool = False,
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Generates dataframes needed to plot calendar heatmap
        The method generates two dataframes where one is used to annotate the heatmap
        and the other is used to apply colors based on the sign dataframe.
        If there are multiple snapshots per day, the latest one will be selected

        Parameters
        ----------
        lookback : int
            Defines how many days to look back at data from the last snapshot
        query : Union[str, dict]
            The query to supply to _apply_query
            If a string, uses the default Pandas query function
            Else, a dict of lists where the key is column name in the dataframe
            and the corresponding value is a list of values to keep in the dataframe
        fill_null_days : bool
            If True, null values will be generated in the dataframe for
            days where there is no model snapshot

        Returns
        -------
        Tuple[pd.DataFrame, pd.DataFrame]
            Tuple of annotate and sign dataframes
        """

        df = self._apply_query(df, query)
        required_columns = {"ModelID", "SnapshotTime", "ResponseCount"}
        assert required_columns.issubset(df.columns)

        df = (df[["ModelID", "SnapshotTime", "ResponseCount"
                  ]].sort_values("SnapshotTime").reset_index(drop=True))
        df["Date"] = pd.Series([i.date() for i in df["SnapshotTime"]])
        df = df[df["Date"] > (df["Date"].max() - timedelta(lookback))]
        if df.shape[0] < 1:
            raise ValueError(("No data within lookback range"))

        idx = (df.groupby(
            ["ModelID",
             "Date"])["SnapshotTime"].transform(max) == df["SnapshotTime"])
        df = df[idx]
        if fill_null_days:
            idx_date = pd.date_range(df["Date"].min(), df["Date"].max())
            df = (
                df.set_index("Date").groupby("ModelID").apply(
                    lambda d: d.reindex(idx_date))
                # .drop("ModelID", axis=1)
                # .reset_index("ModelID")
                .reset_index().rename(columns={"index": "Date"}))
            df["Date"] = pd.to_datetime(df["Date"]).dt.date
        df_annot = df.pivot(columns="Date",
                            values="ResponseCount",
                            index="ModelID")
        df_sign = self._create_sign_df(df_annot)
        return (df_annot, df_sign)
def df_to_heatmap_format(df: pd.DataFrame):
    # pivot the df
    logging.info("Pivot[ing] age bin df")
    plot_heatmap = df.pivot(index=[DATE], columns=[AGE_BAND],
                            values=[COUNT])[COUNT].reset_index().fillna(0)
    logging.info("Pivot[ed] age bin df")

    logging.info("Transform[ing] df for heatmap")
    plot_heatmap_trans = plot_heatmap.transpose().copy()
    # reset df multi index levels
    plot_heatmap_trans = plot_heatmap_trans.reset_index().rename(
        columns=plot_heatmap_trans.iloc[0]).drop(0, axis=0)
    logging.info("Transform[ed] df for heatmap")

    # convert date values
    logging.info("Convert[ing] date columns")
    all_dates = plot_heatmap_trans.columns.to_list()[1:]

    for col in all_dates:
        plot_heatmap_trans[col] = plot_heatmap_trans[col].astype(int)
    logging.info("Convert[ed] date columns")

    # convert age bin values to sting
    logging.info("Convert[ing] age bin columns to string")
    plot_heatmap_trans[AGE_BAND] = plot_heatmap_trans[AGE_BAND].astype(
        "string")
    logging.info("Convert[ed] age bin columns to string")

    return plot_heatmap_trans
Exemple #32
0
    def pipe_age_pivot(self, df: pd.DataFrame) -> pd.DataFrame:

        duplicates = df[df.duplicated(subset=["date", "location", "age_group"])]
        if len(duplicates) > 0:
            print(duplicates)
            raise Exception("There are duplicate combinations of location-date-age_group in the age dataset!")

        df = df.pivot(
            index=["location", "date"],
            columns="age_group",
        ).reset_index()
        # Ensure column order
        columns = pd.MultiIndex.from_tuples(sorted(df.columns, key=lambda x: x[0] + x[1]))
        df = df[columns]
        columns_wrong_1 = df.people_vaccinated_per_hundred.columns.difference(
            df.people_fully_vaccinated_per_hundred.columns
        )
        columns_wrong_2 = df.people_fully_vaccinated_per_hundred.columns.difference(
            df.people_with_booster_per_hundred.columns
        )
        if columns_wrong_1.any() or columns_wrong_2.any():
            raise ValueError(
                f"There is a mismatch between age groups in people vaccinated and people fully vaccinated"
            )
        return df
Exemple #33
0
def getAdjClosePrices(tickers, startdate, enddate):
    """ returns a ready to use pandas DataFrame and a Series with the startDate
    """
    # Open DB Connection, TODO: switch to SQLAlchemy
    db = "/Users/Felix/assetjet.db"
    conn = sqlite3.connect(db, detect_types=sqlite3.PARSE_DECLTYPES)
    cursor = conn.cursor()

    # Query
    cursor.execute("""CREATE TEMP TABLE Tickers (Cd Text)""")
    cursor.executemany("""INSERT INTO Tickers VALUES(?)""", zip(tickers))

    cursor.execute(
        """SELECT ts.Cd, Date, AdjClose
                      FROM TimeSeries ts
                      INNER JOIN Tickers t ON ts.Cd = t.Cd
                      WHERE Date >= ? AND Date <= ?""",
        (startdate, enddate),
    )
    rows = cursor.fetchall()

    # Create a pandas DataFrame
    pricesRaw = DataFrame(rows, columns=zip(*cursor.description)[0])
    pricesRaw.Date = pd.to_datetime(pricesRaw.Date)  # convert date to datetime
    seriesbegin = pricesRaw[["Cd", "Date"]].groupby("Cd").min()
    # Pivot DataFrame
    prices = pricesRaw.pivot("Date", "Cd", "AdjClose")

    # Close DB and Cursor
    cursor.close()
    conn.close()
    return prices, seriesbegin
Exemple #34
0
    def pipe_calculate_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.melt(id_vars="date", var_name="vaccine", value_name="doses")
        df["dose_number"] = df.vaccine.str.extract(r"(\d+)$").astype(int)
        df["vaccine"] = df.vaccine.str.replace(r"(\d+)$", "", regex=True)

        df = df.pivot(index=["date", "vaccine"], columns="dose_number", values="doses").reset_index().fillna(0)

        # total_vaccinations
        df["total_vaccinations"] = df[1] + df[2] + df[3]

        # people_vaccinated
        df["people_vaccinated"] = df[1]

        # people_fully_vaccinated
        df.loc[df.vaccine.isin(self._vax_2d), "people_fully_vaccinated"] = df[2]
        df.loc[df.vaccine.isin(self._vax_1d), "people_fully_vaccinated"] = df[1]

        # total_boosters
        df.loc[df.vaccine.isin(self._vax_2d), "total_boosters"] = df[3]
        df.loc[df.vaccine.isin(self._vax_1d), "total_boosters"] = df[2] + df[3]

        df = (
            df[["date", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]]
            .groupby("date", as_index=False)
            .sum()
            .sort_values("date")
        )

        df[["total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]] = (
            df[["total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]]
            .cumsum()
            .astype(int)
        )

        return df
Exemple #35
0
def save_output(df: pd.DataFrame, relevant_mac_addr_map: dict,
                method: str) -> None:
    df = df.pivot(index="mac_addr", columns="location", values="signal_str")
    filepath = f"{method} - All APs"
    df.to_csv(CSV_OUTPUT_FOLDER / f'{filepath}.csv')
    sns.heatmap(df, annot=True)
    plt.title(f"{method} - RSSI of ALL APs")
    plt.savefig(HEATMAP_OUTPUT_FOLDER / f'{filepath}.png', bbox_inches="tight")
    plt.clf()

    df.dropna(axis=0, inplace=True)
    filepath = f"{method} - Non-NaN APs"
    df.to_csv(CSV_OUTPUT_FOLDER / f'{filepath}.csv')
    sns.heatmap(df, annot=True)
    plt.title(f"{method} - RSSI of all Non-NaN APs")
    plt.savefig(HEATMAP_OUTPUT_FOLDER / f'{filepath}.png', bbox_inches="tight")
    plt.clf()

    df = df[df.index.isin(relevant_mac_addr_map.keys())]
    df = df.rename(index=relevant_mac_addr_map)
    df = df.sort_values('mac_addr')
    filepath = f"{method} - Relevant APs"
    df.to_csv(CSV_OUTPUT_FOLDER / f'{filepath}.csv')
    sns.heatmap(df, annot=True)
    plt.title(f"{method} - RSSI of Relevant APs")
    plt.savefig(HEATMAP_OUTPUT_FOLDER / f'{filepath}.png', bbox_inches="tight")
    plt.clf()
Exemple #36
0
def generate_expresson_matrix(
        table_expression: pandas.DataFrame) -> pandas.DataFrame:
    df = table_expression.pivot(index='locusTag',
                                columns='strain',
                                values='log2FoldChange')

    return df
Exemple #37
0
def results_long_to_wide(metrics: pd.DataFrame) -> pd.DataFrame:
    """Adjusts metric resutls from long format to wide."""

    # Compute lower and upper bound for confidence interval
    metrics["conf_int_lower"] = metrics["diff"] - metrics["confidence_interval"]
    metrics["conf_int_upper"] = metrics["diff"] + metrics["confidence_interval"]

    # Change experiment variants to upper case
    metrics = metrics.assign(exp_variant_id=lambda r: r.exp_variant_id.str.upper())

    # Reshape metrics DataFrame - from long to wide
    r = metrics.pivot(
        index=["exp_id", "exp_variant_id"],
        columns=["metric_name", "metric_id"],
        values=["mean", "diff", "conf_int_lower", "conf_int_upper", "p_value"],
    )

    # Add column multiindex names and transpose
    r.columns.names = ["statistic", "metric_name", "metric_id"]
    r = r.transpose()

    # Sort metrics and statistics in the right order
    r.reset_index(inplace=True)
    r["metric_id"] = r.apply(_enrich_metric_id, axis="columns")
    r.sort_values(by="metric_id", inplace=True)
    r.drop(columns=[("metric_id", "")], inplace=True)

    # Set index and transpose back
    r.set_index(["metric_name", "statistic"], inplace=True)
    r = r.transpose()

    return r
Exemple #38
0
def make_data_container(feature_data: pd.DataFrame,
                        cluster: pd.Series,
                        sample_metadata: pd.DataFrame,
                        fill_na: bool = True) -> DataContainer:
    """
    Organizes the detected and matched features into a DataContainer.

    Parameters
    ----------
    feature_data: DataFrame
        DataFrame obtained from detect_features function.
    cluster: pd.Series
        Series obtained from feature_correspondence function.
    sample_metadata: DataFrame
        DataFrame with information from each analyzed sample. The index must
        be the sample names used in feature_data. A column named "class", with
        the class name of each sample is required. For further data processing
        run order information in a column named "order" and analytical batch
        information in a column named "batch" are recommended.
    fill_na: bool, True
        If True fill missing values in the data matrix with zeros.

    Returns
    -------
    DataContainer
    """

    # remove noise
    feature_data["cluster"] = cluster
    not_noise = cluster != "noise"
    feature_data = feature_data[not_noise]

    # compute aggregate statistics for each feature -> feature metadata
    estimators = {
        "mz": ["mean", "std", "min", "max"],
        "rt": ["mean", "std", "min", "max"]
    }
    feature_metadata = feature_data.groupby("cluster").agg(estimators)
    feature_metadata.columns = _flatten_column_multindex(feature_metadata)
    feature_metadata.index.name = "feature"

    # make data matrix
    data_matrix = feature_data.pivot(index="sample",
                                     columns="cluster",
                                     values="area")
    data_matrix.columns.name = "feature"
    if fill_na:
        data_matrix = data_matrix.fillna(0)

    # add samples without features as nan rows
    missing_index = sample_metadata.index.difference(data_matrix.index)
    # TODO: manage data inputting
    missing = pd.DataFrame(data=0,
                           index=missing_index,
                           columns=data_matrix.columns)
    data_matrix = data_matrix.append(missing)
    data_matrix = data_matrix.loc[sample_metadata.index, :]

    dc = DataContainer(data_matrix, feature_metadata, sample_metadata)
    return dc
Exemple #39
0
def from_pico_stream(df: pd.DataFrame) -> pd.DataFrame:
    """Packs a channel-by-channel data-stream into a PicoLog PLW Player data
    dataframe, where each row has temperature measurements across
    all PicoLogger acquisition channels.

    For an input data-stream of length num_samples x num_channels, the output
    dataframe will have shape (num_samples, num_channels).

    Args:
        df: PicoLog PLW Player data-stream, where each row has a
        temperature measurement from a single PicoLogger acquisition channel.

    Returns:
        Equivalent packed-dataframe, where each row has temperature
        measurements across all PicoLogger acquisition channels.

        index:      None (enumeration of entries)
        columns:    `<channel_name>`, ... x num_channels
    """

    # Reindex timestamps with one timestamp per block of channels
    channels = df['channel'].unique().astype(str)
    df.index = df.index // len(channels)

    # Pivot table
    df = df.pivot(columns='channel', values='temp')

    return df
Exemple #40
0
def img_seaborn(groupusersdict, labels,  values1, values2, values3):
    labels, values1, values2, values3 = filterzerodata4Three(labels, values1, values2, values3)
    if not labels:
        return

    for la in labels:
        t = la.strip('\n')
        if t in groupusersdict.keys():
            groupusersdict[t] = 1

    namecolvalues =[]
    commitcolvalues =[]
    periodvalues =  [0 for x in range(len(groupusersdict.keys()))]
    for name in groupusersdict.keys():
        namecolvalues.append(name)
        commitcolvalues.append(groupusersdict[name])

    fig, ax = plt.subplots(figsize=(14, 2))
    df = DataFrame({'姓名':namecolvalues, '提交':commitcolvalues,'区间':periodvalues})
    result = df.pivot(index='区间', columns='姓名', values='提交')

    ax = sns.heatmap(result,annot=True, fmt="g",cmap="Greens")
    ax.set_title("近7天组内提交情况0代表无提交,1代表有提交,3代表特殊情况")
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    plt.savefig(get_resultpath() + '/seaborn.png')
    plt.close()
Exemple #41
0
def getAdjClosePrices(tickers, startdate, enddate):
    """ returns a ready to use pandas DataFrame and a Series with the startDate
    """
    # Open DB Connection, TODO: switch to SQLAlchemy
    db = '/Users/Felix/assetjet.db'
    conn = sqlite3.connect(db, detect_types=sqlite3.PARSE_DECLTYPES)
    cursor = conn.cursor()

    # Query
    cursor.execute("""CREATE TEMP TABLE Tickers (Cd Text)""")
    cursor.executemany("""INSERT INTO Tickers VALUES(?)""", zip(tickers))

    cursor.execute(
        """SELECT ts.Cd, Date, AdjClose
                      FROM TimeSeries ts
                      INNER JOIN Tickers t ON ts.Cd = t.Cd
                      WHERE Date >= ? AND Date <= ?""", (startdate, enddate))
    rows = cursor.fetchall()

    # Create a pandas DataFrame
    pricesRaw = DataFrame(rows, columns=zip(*cursor.description)[0])
    pricesRaw.Date = pd.to_datetime(pricesRaw.Date)  # convert date to datetime
    seriesbegin = pricesRaw[['Cd', 'Date']].groupby('Cd').min()
    # Pivot DataFrame
    prices = pricesRaw.pivot('Date', 'Cd', 'AdjClose')

    # Close DB and Cursor
    cursor.close()
    conn.close()
    return prices, seriesbegin
Exemple #42
0
 def pipe_pivot(self, df: pd.DataFrame) -> pd.DataFrame:
     if self.pivot_column is not None and self.pivot_values is not None:
         return df.pivot(
             index=[self.location, self.date],
             columns=self.pivot_column,
             values=self.pivot_values,
         ).reset_index()
     return df
Exemple #43
0
def plot_gat(data, plot_title, file_name):
    print(f'Data to plot: {data}')
    data.algorithmName = data.algorithmLabel
    results = DataFrame(
        columns="actionDuration withinOpt algorithmName lbound rbound".split())

    # rescale action durations to ms
    data['actionDuration'] = data['actionDuration'] / 1000000

    # Change data structure such that goal achievement time is averaged,
    # grouped by action duration and algorithm
    for fields, duration_group in data.groupby(
            ['algorithmName', 'actionDuration']):
        alg_name = fields[0]
        if alg_name in alg_map:
            alg_name = alg_map[alg_name]

        # Get mean of within optimal calculation, add row to results dataframe
        mean_within_opt = duration_group['withinOpt'].mean()
        within_opt_list = list(duration_group['withinOpt'])
        bound = sms.DescrStatsW(within_opt_list).zconfint_mean()
        results = add_row(results,
                          [fields[1], mean_within_opt, alg_name,
                           abs(mean_within_opt - bound[0]),
                           abs(mean_within_opt - bound[1])])

    errors = []
    for alg, alg_group in results.groupby('algorithmName'):
        errors.append([alg_group['lbound'].values, alg_group['rbound'].values])

    pivot = results.pivot(index="actionDuration", columns="algorithmName",
                          values="withinOpt")
    pivot = pivot[~pivot.index.duplicated(keep='first')]

    # Below is palette of distinguishable colors for analyzing large sets of algorithms together
    # colors = ["#90C3D4", "#C390D4", "#D4A190", "#A1D490", "#AB3299", "#AB8132", "#32AB44","#325DAB","#9BAB32", "#32AB7E","#4232AB","#AB325F","#495E49","#49545E","#5E495E", "#5E5449","#FA7887","#C8FA78","#78FAEB","#AA78FA"]
    palette = sns.color_palette(n_colors=10)
    plot = pivot.plot(color=palette, title=plot_title, legend=True, yerr=errors,
                      ecolor='black', elinewidth=1,
                      capsize=4, capthick=1)

    # plot.set_xscale('log')
    # plot.set_yscale('log')

    # plot.set_xticks([50, 100, 150, 250, 500, 1000, 2000, 3200])
    # plot.set_yticks([1, 1.1, 1.5, 2])
    # plot.set_ylim([1, 1.4])
    plot.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())

    plot.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())

    plot.set_xlabel('Planning Time per Iteration (milliseconds)')
    plot.set_ylabel('Goal Achievement Time (Factor of Optimal)')
    plot.legend(title="")

    pdf = PdfPages("../results/plots/" + file_name + ".pdf")
    plt.savefig(pdf, format='pdf')
    pdf.close()
Exemple #44
0
    def play_ai(self, rounds=100, print_stats=False):
        winning_history = []
        for i in range(rounds):
            if i % 100 == 0:
                print("Rounds {}".format(i))
            while not self.gameover:
                positions = self.potential_moves()
                player1_action = self.player1.pick_next_move(
                    positions, self.board, self.player_mark)
                self.make_move(player1_action)
                board_hash = self.encode_to_key()
                self.player1.append_board_state(board_hash)

                win = self.winner()
                if win is not None:
                    # self.print_board()
                    winning_history.append(win)
                    self.giveReward()
                    self.player1.reset_player()
                    self.player2.reset_player()
                    self.reset_board()
                    break

                else:
                    positions = self.potential_moves()
                    player2_action = self.player2.pick_next_move(
                        positions, self.board, self.player_mark)
                    self.make_move(player2_action)
                    board_hash = self.encode_to_key()
                    self.player2.append_board_state(board_hash)

                    win = self.winner()
                    if win is not None:
                        winning_history.append(win)
                        # ended with player2 either win or draw
                        self.giveReward()
                        self.player1.reset_player()
                        self.player2.reset_player()
                        self.reset_board()
                        break

        self.player1.save_state_values()
        self.player2.save_state_values()

        if print_stats:
            winning_series = Series(winning_history)
            print(winning_series.value_counts(normalize=True, ascending=False))
            winning_df = DataFrame({
                'wins': winning_series,
                'values': np.ones(len(winning_series))
            })
            df_to_plot = winning_df.pivot(columns='wins',
                                          values='values').fillna(0).cumsum()
            for c in df_to_plot.columns.values:
                plt.plot(df_to_plot[c], label=f'Winner:{c}')
            plt.legend()
            plt.show()
Exemple #45
0
def tiempos_de_respuesta(datos):
    df = DataFrame(
        datos,
        columns=['pid', 'numero_request', 'tiempo_respuesta', 'status_code'])
    df = df.pivot(index='numero_request',
                  columns='pid',
                  values='tiempo_respuesta')
    df.plot()
    plt.savefig("tiemposDeRespuesta.png")
Exemple #46
0
    def test_pivot_with_tz(self):
        # GH 5878
        df = DataFrame(
            {
                "dt1": [
                    datetime(2013, 1, 1, 9, 0),
                    datetime(2013, 1, 2, 9, 0),
                    datetime(2013, 1, 1, 9, 0),
                    datetime(2013, 1, 2, 9, 0),
                ],
                "dt2": [
                    datetime(2014, 1, 1, 9, 0),
                    datetime(2014, 1, 1, 9, 0),
                    datetime(2014, 1, 2, 9, 0),
                    datetime(2014, 1, 2, 9, 0),
                ],
                "data1": np.arange(4, dtype="int64"),
                "data2": np.arange(4, dtype="int64"),
            }
        )

        df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific"))
        df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo"))

        exp_col1 = Index(["data1", "data1", "data2", "data2"])
        exp_col2 = pd.DatetimeIndex(["2014/01/01 09:00", "2014/01/02 09:00"] * 2, name="dt2", tz="Asia/Tokyo")
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame(
            [[0, 2, 0, 2], [1, 3, 1, 3]],
            index=pd.DatetimeIndex(["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"),
            columns=exp_col,
        )

        pv = df.pivot(index="dt1", columns="dt2")
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame(
            [[0, 2], [1, 3]],
            index=pd.DatetimeIndex(["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"),
            columns=pd.DatetimeIndex(["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"),
        )

        pv = df.pivot(index="dt1", columns="dt2", values="data1")
        tm.assert_frame_equal(pv, expected)
    def test_pivot_with_tz(self):
        # GH 5878
        df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0),
                                datetime(2013, 1, 2, 9, 0),
                                datetime(2013, 1, 1, 9, 0),
                                datetime(2013, 1, 2, 9, 0)],
                        'dt2': [datetime(2014, 1, 1, 9, 0),
                                datetime(2014, 1, 1, 9, 0),
                                datetime(2014, 1, 2, 9, 0),
                                datetime(2014, 1, 2, 9, 0)],
                        'data1': np.arange(4, dtype='int64'),
                        'data2': np.arange(4, dtype='int64')})

        df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific'))
        df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo'))

        exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
        exp_col2 = pd.DatetimeIndex(['2014/01/01 09:00',
                                     '2014/01/02 09:00'] * 2,
                                    name='dt2', tz='Asia/Tokyo')
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
                             index=pd.DatetimeIndex(['2013/01/01 09:00',
                                                     '2013/01/02 09:00'],
                                                    name='dt1',
                                                    tz='US/Pacific'),
                             columns=exp_col)

        pv = df.pivot(index='dt1', columns='dt2')
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame([[0, 2], [1, 3]],
                             index=pd.DatetimeIndex(['2013/01/01 09:00',
                                                     '2013/01/02 09:00'],
                                                    name='dt1',
                                                    tz='US/Pacific'),
                             columns=pd.DatetimeIndex(['2014/01/01 09:00',
                                                       '2014/01/02 09:00'],
                                                      name='dt2',
                                                      tz='Asia/Tokyo'))

        pv = df.pivot(index='dt1', columns='dt2', values='data1')
        tm.assert_frame_equal(pv, expected)
Exemple #48
0
    def test_pivot_periods(self):
        df = DataFrame(
            {
                "p1": [
                    pd.Period("2013-01-01", "D"),
                    pd.Period("2013-01-02", "D"),
                    pd.Period("2013-01-01", "D"),
                    pd.Period("2013-01-02", "D"),
                ],
                "p2": [
                    pd.Period("2013-01", "M"),
                    pd.Period("2013-01", "M"),
                    pd.Period("2013-02", "M"),
                    pd.Period("2013-02", "M"),
                ],
                "data1": np.arange(4, dtype="int64"),
                "data2": np.arange(4, dtype="int64"),
            }
        )

        exp_col1 = Index(["data1", "data1", "data2", "data2"])
        exp_col2 = pd.PeriodIndex(["2013-01", "2013-02"] * 2, name="p2", freq="M")
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame(
            [[0, 2, 0, 2], [1, 3, 1, 3]],
            index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
            columns=exp_col,
        )

        pv = df.pivot(index="p1", columns="p2")
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame(
            [[0, 2], [1, 3]],
            index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
            columns=pd.PeriodIndex(["2013-01", "2013-02"], name="p2", freq="M"),
        )

        pv = df.pivot(index="p1", columns="p2", values="data1")
        tm.assert_frame_equal(pv, expected)
Exemple #49
0
def GetPricngData():
    try:
        query = "SELECT DISTINCT a.Store AS Store,a.`Top Level Category`,ROUND(AVG(a.Price),2) as CompetitorsPrice, ROUND(b.MyStorePrice,2) as MyStorePrice FROM pricing  a JOIN (SELECT DISTINCT `Top Level Category` AS category,AVG(Price) AS MyStorePrice FROM pricing WHERE Store = 'My Store' GROUP BY Store,`Top Level Category`)  b ON a.`Top Level Category` = b.category GROUP BY a.Store,a.`Top Level Category`"
        result = list(ExecuteQuery(query))
        df = DataFrame(result)
        pi = df.pivot(index="Top Level Category", columns="Store", values="CompetitorsPrice")
        pi[np.isnan(pi)] = 0
        pricingResult = {}
        pricingResult["index"] = pi.index.tolist()
        pricingResult["columns"] = pi.columns.tolist()
        pricingResult["values"] = pi.values.tolist()
        return json.dumps(pricingResult)
    except Exception as e:
        return e.message
 def test_pivot_empty(self):
     df = DataFrame({}, columns=["a", "b", "c"])
     result = df.pivot("a", "b", "c")
     expected = DataFrame({})
     assert_frame_equal(result, expected, check_names=False)
         ,['mercredi','temperature',28]
         ,['mercredi','ensoleillement',4]
         ,['mercredi','pollution',5]
         ,['mercredi','pluie',100]
         ,['jeudi','temperature',28]
         ,['jeudi','ensoleillement',4]
         ,['jeudi','pollution',5]
         ,['jeudi','pluie',100]
         ,['vendredi','temperature',28]
         ,['vendredi','ensoleillement',4]
         ,['vendredi','pollution',5]
         ,['vendredi','pluie',100]
         ]

cities_data = DataFrame(releves, columns=['jour','attribute','value'])
cities_data.pivot('jour','attribute','value')


aliments = pd.read_csv('aliments.csv', sep='\t')

aliments_with_traces = aliments.ix[aliments.traces.dropna().index]
traces_iter = (set(x.split(',')) for x in aliments_with_traces['traces'])
traces = set.union(*traces_iter)
dummies = DataFrame(np.zeros((len(aliments_with_traces), len(traces))), columns=traces)

for i, tr in enumerate(aliments_with_traces.traces):
    dummies.ix[i, tr.split(',')] = 1


pd.value_counts(pd.qcut(aliments[u'energy_100g'].dropna(),5))
pd.value_counts(pd.cut(aliments[u'energy_100g'].dropna(),5))
mergeM = pd.merge(sales, bonus, on = 'ID')                        # A many-to-many Join
stack = pd.concat([employee, sales], ignore_index = True)         # Vertical Stacking

#############################################################################################################
# 8. Reshaping & Pivoting
#############################################################################################################
df1 = DataFrame([['Big','LAX',3,np.nan],['Big','SFO',6,7],['Med','SEA-TAC',9,np.nan],['Small','POR',np.nan,np.nan]],
                 index=pd.Index(['LA', 'SF', 'SEA', 'POR']),
                 columns=pd.Index(['Type', 'Airport', 'Cool Factor','D']))

# .unstack(): used to convert columns into rows and into a hierarchical index 
df2 = df1.stack(dropna = False)                    # converts columns into the child index
df3 = df1.unstack()                                # converts columns into the parent index 

# .pivot(index, columns, values) is used to reshape data like dplyr in R
df4 = df1.pivot('Airport','Type','Cool Factor')    # yes! its that easy to reshape!

#############################################################################################################
# 9. Outlier Analysis
#############################################################################################################
np.random.seed(12345)
df = DataFrame(np.random.randn(1000,4))
df.describe()                                        # assume outliers are in the -+3 region

df[0][np.abs(df[0])>3]                               # show all rows in column 0 that are > abs(3)
df[(np.abs(df)>3).any(1)]                            # show all values in the dataframe that are > abs(3)
df[np.abs(df)>3] = np.sign(df) * 3                   # caps all values > abs(3) to 3; .sign()                                

#############################################################################################################
# 10. Binning Data
#############################################################################################################
Exemple #53
0
 def test_pivot_duplicates(self):
     data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'],
                       'b': ['one', 'two', 'one', 'one', 'two'],
                       'c': [1., 2., 3., 3., 4.]})
     with pytest.raises(ValueError, match='duplicate entries'):
         data.pivot('a', 'b', 'c')
replacements = {"one": -1, "two": -2}
df.fillna(value=replacements)

df = DataFrame(array([[1, 3], [1, 2], [3, 2], [2, 1]]), columns=["one", "two"])
df.sort(columns="one")
df.sort(columns=["one", "two"])
df.sort(columns=["one", "two"], ascending=[0, 1])

prices = [101.0, 102.0, 103.0]
tickers = ["GOOG", "AAPL"]
data = [v for v in itertools.product(tickers, prices)]
dates = pandas.date_range("2013-01-03", periods=3)
df = DataFrame(data, columns=["ticker", "price"])
df["dates"] = dates.append(dates)
df
df.pivot(index="dates", columns="ticker", values="price")

original = DataFrame([[1, 1], [2, 2], [3.0, 3]], index=["a", "b", "c"], columns=["one", "two"])
original.reindex(index=["b", "c", "d"])
different = DataFrame([[1, 1], [2, 2], [3.0, 3]], index=["c", "d", "e"], columns=["one", "two"])
original.reindex_like(different)
original.reindex_axis(["two", "one"], axis=1)

left = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["one", "two"])
right = DataFrame([[1, 2], [3, 4], [7, 8]], columns=["one", "three"])
left.merge(right, on="one")  # Same as how='inner'
left.merge(right, on="one", how="left")
left.merge(right, on="one", how="right")
left.merge(right, on="one", how="outer")

left = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["one", "two"])
Exemple #55
0
    def test_pivot_integer_bug(self):
        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")])

        result = df.pivot(index=1, columns=0, values=2)
        repr(result)
        tm.assert_index_equal(result.columns, Index(['A', 'B'], name=0))
Exemple #56
0
 def test_pivot_empty(self):
     df = DataFrame({}, columns=['a', 'b', 'c'])
     result = df.pivot('a', 'b', 'c')
     expected = DataFrame({})
     tm.assert_frame_equal(result, expected, check_names=False)
Exemple #57
0
 def test_pivot_duplicates(self):
     data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'],
                       'b': ['one', 'two', 'one', 'one', 'two'],
                       'c': [1., 2., 3., 3., 4.]})
     with tm.assert_raises_regex(ValueError, 'duplicate entries'):
         data.pivot('a', 'b', 'c')
Exemple #58
0
    def test_pivot_integer_bug(self):
        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")])

        result = df.pivot(index=1, columns=0, values=2)
        repr(result)
        self.assert_numpy_array_equal(result.columns, ['A', 'B'])
Exemple #59
0
replacements = {'one':-1, 'two':-2}
df.fillna(value=replacements)

df = DataFrame(array([[1, 3],[1, 2],[3, 2],[2,1]]), columns=['one','two'])
df.sort(columns='one')
df.sort(columns=['one','two'])
df.sort(columns=['one','two'], ascending=[0,1])

prices = [101.0,102.0,103.0]
tickers = ['GOOG','AAPL']
data = [v for v in itertools.product(tickers,prices)]
dates = pandas.date_range('2013-01-03',periods=3)
df = DataFrame(data, columns=['ticker','price'])
df['dates'] = dates.append(dates)
df
df.pivot(index='dates',columns='ticker',values='price')

original = DataFrame([[1,1],[2,2],[3.0,3]],index=['a','b','c'], columns=['one','two'])
original.reindex(index=['b','c','d'])
different = DataFrame([[1,1],[2,2],[3.0,3]],index=['c','d','e'], columns=['one','two'])
original.reindex_like(different)
original.reindex_axis(['two','one'], axis = 1)

left = DataFrame([[1,2],[3,4],[5,6]],columns=['one','two'])
right = DataFrame([[1,2],[3,4],[7,8]],columns=['one','three'])
left.merge(right,on='one') # Same as how='inner'
left.merge(right,on='one', how='left')
left.merge(right,on='one', how='right')
left.merge(right,on='one', how='outer')

left = DataFrame([[1,2],[3,4],[5,6]],columns=['one','two'])
s2 = Series([4, 5, 6], index=['c', 'd', 'e'])
data2 = pd.concat([s1, s2], keys=['one', 'two'])

# print data2
# print data2.unstack()
# print data2.unstack().stack()
# print data2.unstack().stack(dropna=False)

df = DataFrame({'left': result, 'right': result+5}, columns=pd.Index(['left', 'right'], name='side'))
# print df
# print df.unstack('state')
# print df.unstack('state').stack('side')

xls_file = pd.ExcelFile('C:\\Users\\Administrator\\Desktop\\data.xlsx')
ldata = DataFrame(xls_file.parse('Sheet1'))
pivoted = ldata.pivot('date', 'item', 'value')
# print ldata[:10]
# print pivoted.head()

ldata["value2"] = np.random.randn(len(ldata))
# print ldata[:10]

pivoted = ldata.pivot('date', 'item')
# print pivoted[:5]
# print pivoted['value'][:3]

unstacked = ldata.set_index(['date', 'item']).unstack('item')
# print unstacked[:7]

data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4, 'k2': [1, 1, 2, 3, 3, 4, 4]})
# print data