Example #1
0
    def test_pivot_periods(self):
        df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),
                               pd.Period('2013-01-02', 'D'),
                               pd.Period('2013-01-01', 'D'),
                               pd.Period('2013-01-02', 'D')],
                        'p2': [pd.Period('2013-01', 'M'),
                               pd.Period('2013-01', 'M'),
                               pd.Period('2013-02', 'M'),
                               pd.Period('2013-02', 'M')],
                        'data1': np.arange(4, dtype='int64'),
                        'data2': np.arange(4, dtype='int64')})

        exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
        exp_col2 = pd.PeriodIndex(['2013-01', '2013-02'] * 2,
                                  name='p2', freq='M')
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
                             index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
                                                  name='p1', freq='D'),
                             columns=exp_col)

        pv = df.pivot(index='p1', columns='p2')
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame([[0, 2], [1, 3]],
                             index=pd.PeriodIndex(['2013-01-01', '2013-01-02'],
                                                  name='p1', freq='D'),
                             columns=pd.PeriodIndex(['2013-01', '2013-02'],
                                                    name='p2', freq='M'))

        pv = df.pivot(index='p1', columns='p2', values='data1')
        tm.assert_frame_equal(pv, expected)
Example #2
0
    def test_pivot_index_with_nan(self):
        # GH 3588
        nan = np.nan
        df = DataFrame({'a': ['R1', 'R2', nan, 'R4'],
                        'b': ['C1', 'C2', 'C3', 'C4'],
                        'c': [10, 15, 17, 20]})
        result = df.pivot('a', 'b', 'c')
        expected = DataFrame([[nan, nan, 17, nan], [10, nan, nan, nan],
                              [nan, 15, nan, nan], [nan, nan, nan, 20]],
                             index=Index([nan, 'R1', 'R2', 'R4'], name='a'),
                             columns=Index(['C1', 'C2', 'C3', 'C4'], name='b'))
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)

        # GH9491
        df = DataFrame({'a': pd.date_range('2014-02-01', periods=6, freq='D'),
                        'c': 100 + np.arange(6)})
        df['b'] = df['a'] - pd.Timestamp('2014-02-02')
        df.loc[1, 'a'] = df.loc[3, 'a'] = nan
        df.loc[1, 'b'] = df.loc[4, 'b'] = nan

        pv = df.pivot('a', 'b', 'c')
        self.assertEqual(pv.notnull().values.sum(), len(df))

        for _, row in df.iterrows():
            self.assertEqual(pv.loc[row['a'], row['b']], row['c'])

        tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
Example #3
0
    def test_pivot(self):
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(
            index='index', columns='columns', values='values')

        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        tm.assert_frame_equal(pivoted, expected)

        # name tracking
        assert pivoted.index.name == 'index'
        assert pivoted.columns.name == 'columns'

        # don't specify values
        pivoted = frame.pivot(index='index', columns='columns')
        assert pivoted.index.name == 'index'
        assert pivoted.columns.names == (None, 'columns')
Example #4
0
    def test_pivot(self):
        data = {
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(index="index", columns="columns", values="values")

        expected = DataFrame({"One": {"A": 1.0, "B": 2.0, "C": 3.0}, "Two": {"A": 1.0, "B": 2.0, "C": 3.0}})
        expected.index.name, expected.columns.name = "index", "columns"

        assert_frame_equal(pivoted, expected)

        # name tracking
        self.assertEqual(pivoted.index.name, "index")
        self.assertEqual(pivoted.columns.name, "columns")

        # don't specify values
        pivoted = frame.pivot(index="index", columns="columns")
        self.assertEqual(pivoted.index.name, "index")
        self.assertEqual(pivoted.columns.names, (None, "columns"))

        # pivot multiple columns
        wp = tm.makePanel()
        lp = wp.to_frame()
        df = lp.reset_index()
        assert_frame_equal(df.pivot("major", "minor"), lp.unstack())
Example #5
0
    def test_pivot_index_none(self):
        # gh-3962
        data = {
            "index": ["A", "B", "C", "C", "B", "A"],
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        frame = DataFrame(data).set_index("index")
        result = frame.pivot(columns="columns", values="values")
        expected = DataFrame({"One": {"A": 1.0, "B": 2.0, "C": 3.0}, "Two": {"A": 1.0, "B": 2.0, "C": 3.0}})

        expected.index.name, expected.columns.name = "index", "columns"
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns="columns")

        expected.columns = pd.MultiIndex.from_tuples([("values", "One"), ("values", "Two")], names=[None, "columns"])
        expected.index.name = "index"
        assert_frame_equal(result, expected, check_names=False)
        self.assertEqual(result.index.name, "index")
        self.assertEqual(result.columns.names, (None, "columns"))
        expected.columns = expected.columns.droplevel(0)

        data = {
            "index": range(7),
            "columns": ["One", "One", "One", "Two", "Two", "Two"],
            "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0],
        }

        result = frame.pivot(columns="columns", values="values")

        expected.columns.name = "columns"
        assert_frame_equal(result, expected)
Example #6
0
    def test_pivot(self):
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data)
        pivoted = frame.pivot(
            index='index', columns='columns', values='values')

        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        tm.assert_frame_equal(pivoted, expected)

        # name tracking
        assert pivoted.index.name == 'index'
        assert pivoted.columns.name == 'columns'

        # don't specify values
        pivoted = frame.pivot(index='index', columns='columns')
        assert pivoted.index.name == 'index'
        assert pivoted.columns.names == (None, 'columns')

        with catch_warnings(record=True):
            # pivot multiple columns
            wp = tm.makePanel()
            lp = wp.to_frame()
            df = lp.reset_index()
            tm.assert_frame_equal(df.pivot('major', 'minor'), lp.unstack())
Example #7
0
    def test_pivot_index_none(self):
        # gh-3962
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data).set_index('index')
        result = frame.pivot(columns='columns', values='values')
        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns='columns')

        expected.columns = pd.MultiIndex.from_tuples([('values', 'One'),
                                                      ('values', 'Two')],
                                                     names=[None, 'columns'])
        expected.index.name = 'index'
        tm.assert_frame_equal(result, expected, check_names=False)
        assert result.index.name == 'index'
        assert result.columns.names == (None, 'columns')
        expected.columns = expected.columns.droplevel(0)
        result = frame.pivot(columns='columns', values='values')

        expected.columns.name = 'columns'
        tm.assert_frame_equal(result, expected)
Example #8
0
    def test_pivot_index_with_nan(self):
        # GH 3588
        nan = np.nan
        df = DataFrame({"a": ["R1", "R2", nan, "R4"], "b": ["C1", "C2", "C3", "C4"], "c": [10, 15, 17, 20]})
        result = df.pivot("a", "b", "c")
        expected = DataFrame(
            [[nan, nan, 17, nan], [10, nan, nan, nan], [nan, 15, nan, nan], [nan, nan, nan, 20]],
            index=Index([nan, "R1", "R2", "R4"], name="a"),
            columns=Index(["C1", "C2", "C3", "C4"], name="b"),
        )
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T)

        # GH9491
        df = DataFrame({"a": pd.date_range("2014-02-01", periods=6, freq="D"), "c": 100 + np.arange(6)})
        df["b"] = df["a"] - pd.Timestamp("2014-02-02")
        df.loc[1, "a"] = df.loc[3, "a"] = nan
        df.loc[1, "b"] = df.loc[4, "b"] = nan

        pv = df.pivot("a", "b", "c")
        self.assertEqual(pv.notnull().values.sum(), len(df))

        for _, row in df.iterrows():
            self.assertEqual(pv.loc[row["a"], row["b"]], row["c"])

        tm.assert_frame_equal(df.pivot("b", "a", "c"), pv.T)
Example #9
0
 def test_pivot_duplicates(self):
     data = DataFrame(
         {
             "a": ["bar", "bar", "foo", "foo", "foo"],
             "b": ["one", "two", "one", "one", "two"],
             "c": [1.0, 2.0, 3.0, 3.0, 4.0],
         }
     )
     with assertRaisesRegexp(ValueError, "duplicate entries"):
         data.pivot("a", "b", "c")
Example #10
0
class Pivot(object):

    def setup(self):
        N = 10000
        index = date_range('1/1/2000', periods=N, freq='h')
        data = {'value': np.random.randn(N * 50),
                'variable': np.arange(50).repeat(N),
                'date': np.tile(index.values, 50)}
        self.df = DataFrame(data)

    def time_reshape_pivot_time_series(self):
        self.df.pivot('date', 'variable', 'value')
Example #11
0
 def test_pivot_index_with_nan(self):
     # GH 3588
     nan = np.nan
     df = DataFrame({'a':['R1', 'R2', nan, 'R4'],
                     'b':['C1', 'C2', 'C3' , 'C4'],
                     'c':[10, 15, 17, 20]})
     result = df.pivot('a','b','c')
     expected = DataFrame([[nan,nan,17,nan],[10,nan,nan,nan],
                           [nan,15,nan,nan],[nan,nan,nan,20]],
                          index = Index([nan,'R1','R2','R4'],name='a'),
                          columns = Index(['C1','C2','C3','C4'],name='b'))
     tm.assert_frame_equal(result, expected)
     tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)
def print_survival_rate(df):
    for domain_path, domain_group in df.groupby(["domainPath"]):
        survival_results = DataFrame(columns="actionDuration algorithmName survival lbound rbound".split())
        domain_name = re.search("[^/]+$", domain_path).group(0).rstrip(".track")

        for fields, action_group in domain_group.groupby(['algorithmName', 'actionDuration']):
            total_trials = len(action_group)
            error_experiments = action_group[action_group["errorMessage"].notnull()]

            deaths = len(error_experiments[error_experiments["errorMessage"] != "Timeout"])
            timeouts = len(error_experiments) - deaths
            successes = len(action_group[~action_group["errorMessage"].notnull()])

            survival_confint = proportion_confint(successes, total_trials, 0.05)
            survival_rate = (successes / (successes + deaths))
            survival_results = add_row(survival_results,
                                      [fields[1], fields[0], survival_rate, survival_confint[0], survival_confint[1]])


        fig, ax = plt.subplots()
        errors = []
        for alg, alg_group in survival_results.groupby('algorithmName'):
            errors.append([(alg_group['lbound'] - alg_group['survival']).values,
                           (alg_group['rbound'].values - alg_group['survival']).values])
        errors = np.abs(errors)
        print(errors)
        survival = survival_results.pivot(index='actionDuration', columns='algorithmName', values='survival')

        survival.plot(ax=ax, yerr=errors,
                      xlim=[0, 7000], ylim=[0, 1.0],
                      capsize=4, capthick=1, ecolor='black', cmap=plt.get_cmap("rainbow"), elinewidth=1)

        plt.savefig('test.png', format='png')
def main():
    data = construct_data_frame(read_data("../output/results.json"))
    set_rc()


    data.drop(['commitmentType', "success", "timeLimit",
               "terminationType", 'timestamp', 'octileMovement', 'lookaheadType',
               'firstIterationDuration', 'generatedNodes', 'expandedNodes', 'domainInstanceName', 'domain_name',
               'planningTime'],
              axis=1,
              inplace=True,
              errors='ignore')


    # this is a fix for the traffic domain which does not have domainSeed values, so I have to fake it
    if 'domainSeed' not in data:
        data['domainSeed'] = data['domainPath']
        data['domainPath'] = 'vehicle'



    # get min and max ranges for actionDuration for plotting later
    min_range = data.min()['actionDuration']
    max_range = data.max()['actionDuration']

    sns.set_style("white")

    # print_survival_rate(data)
    data = data[~data['errorMessage'].notnull()]
    data.sort_values(['domainPath', 'actionDuration'], ascending=True, inplace=True)

    astar = data[data["algorithmName"] == "A_STAR"]
    astar["opt"] = astar["actionDuration"] * astar["pathLength"]
    astar = astar[["domainPath", "domainSeed", "opt", "actionDuration"]]
    data = pd.merge(data, astar, how='inner', on=['domainPath', 'domainSeed', 'actionDuration'])
    data["withinOpt"] = data["goalAchievementTime"] / data["opt"]

    for domain_path, domain_group in data.groupby(["domainPath"]):
        results = DataFrame(columns="actionDuration algorithmName withinOpt lbound rbound".split())
        domain_name = re.search("[^/]+$", domain_path).group(0).rstrip(".track")

        for fields, action_group in domain_group.groupby(['algorithmName', 'actionDuration']):

            bound = sms.DescrStatsW(action_group["withinOpt"]).tconfint_mean()
            mean = action_group["withinOpt"].mean()
            results = add_row(results, [fields[1], fields[0], mean, abs(mean - bound[0]), abs(mean - bound[1])])

        fig, ax = plt.subplots()
        errors = []
        for alg, alg_group in results.groupby('algorithmName'):
            errors.append([alg_group['lbound'].values, alg_group['rbound'].values])

        pivot = results.pivot(index='actionDuration', columns='algorithmName', values='withinOpt')
        plot = pivot.plot(ax=ax, yerr=errors,
                      capsize=4, capthick=1, ecolor='black', cmap=plt.get_cmap("rainbow"), elinewidth=1)
        plot.legend(title="Planners", shadow=True, frameon=True, framealpha=1.0, facecolor='lightgrey')


        format_plot(plot)
        plt.savefig(domain_name + ".png", format='png')
Example #14
0
def getAdjClosePrices(tickers, startdate, enddate):
    """ returns a ready to use pandas DataFrame and a Series with the startDate
    """
    # Open DB Connection, TODO: switch to SQLAlchemy
    db = "/Users/Felix/assetjet.db"
    conn = sqlite3.connect(db, detect_types=sqlite3.PARSE_DECLTYPES)
    cursor = conn.cursor()

    # Query
    cursor.execute("""CREATE TEMP TABLE Tickers (Cd Text)""")
    cursor.executemany("""INSERT INTO Tickers VALUES(?)""", zip(tickers))

    cursor.execute(
        """SELECT ts.Cd, Date, AdjClose
                      FROM TimeSeries ts
                      INNER JOIN Tickers t ON ts.Cd = t.Cd
                      WHERE Date >= ? AND Date <= ?""",
        (startdate, enddate),
    )
    rows = cursor.fetchall()

    # Create a pandas DataFrame
    pricesRaw = DataFrame(rows, columns=zip(*cursor.description)[0])
    pricesRaw.Date = pd.to_datetime(pricesRaw.Date)  # convert date to datetime
    seriesbegin = pricesRaw[["Cd", "Date"]].groupby("Cd").min()
    # Pivot DataFrame
    prices = pricesRaw.pivot("Date", "Cd", "AdjClose")

    # Close DB and Cursor
    cursor.close()
    conn.close()
    return prices, seriesbegin
Example #15
0
def plot_gat(data, plot_title, file_name):
    print(f'Data to plot: {data}')
    data.algorithmName = data.algorithmLabel
    results = DataFrame(
        columns="actionDuration withinOpt algorithmName lbound rbound".split())

    # rescale action durations to ms
    data['actionDuration'] = data['actionDuration'] / 1000000

    # Change data structure such that goal achievement time is averaged,
    # grouped by action duration and algorithm
    for fields, duration_group in data.groupby(
            ['algorithmName', 'actionDuration']):
        alg_name = fields[0]
        if alg_name in alg_map:
            alg_name = alg_map[alg_name]

        # Get mean of within optimal calculation, add row to results dataframe
        mean_within_opt = duration_group['withinOpt'].mean()
        within_opt_list = list(duration_group['withinOpt'])
        bound = sms.DescrStatsW(within_opt_list).zconfint_mean()
        results = add_row(results,
                          [fields[1], mean_within_opt, alg_name,
                           abs(mean_within_opt - bound[0]),
                           abs(mean_within_opt - bound[1])])

    errors = []
    for alg, alg_group in results.groupby('algorithmName'):
        errors.append([alg_group['lbound'].values, alg_group['rbound'].values])

    pivot = results.pivot(index="actionDuration", columns="algorithmName",
                          values="withinOpt")
    pivot = pivot[~pivot.index.duplicated(keep='first')]

    # Below is palette of distinguishable colors for analyzing large sets of algorithms together
    # colors = ["#90C3D4", "#C390D4", "#D4A190", "#A1D490", "#AB3299", "#AB8132", "#32AB44","#325DAB","#9BAB32", "#32AB7E","#4232AB","#AB325F","#495E49","#49545E","#5E495E", "#5E5449","#FA7887","#C8FA78","#78FAEB","#AA78FA"]
    palette = sns.color_palette(n_colors=10)
    plot = pivot.plot(color=palette, title=plot_title, legend=True, yerr=errors,
                      ecolor='black', elinewidth=1,
                      capsize=4, capthick=1)

    # plot.set_xscale('log')
    # plot.set_yscale('log')

    # plot.set_xticks([50, 100, 150, 250, 500, 1000, 2000, 3200])
    # plot.set_yticks([1, 1.1, 1.5, 2])
    # plot.set_ylim([1, 1.4])
    plot.get_xaxis().set_major_formatter(mpl.ticker.ScalarFormatter())

    plot.get_yaxis().set_major_formatter(mpl.ticker.ScalarFormatter())

    plot.set_xlabel('Planning Time per Iteration (milliseconds)')
    plot.set_ylabel('Goal Achievement Time (Factor of Optimal)')
    plot.legend(title="")

    pdf = PdfPages("../results/plots/" + file_name + ".pdf")
    plt.savefig(pdf, format='pdf')
    pdf.close()
Example #16
0
    def test_pivot_with_tz(self):
        # GH 5878
        df = DataFrame(
            {
                "dt1": [
                    datetime(2013, 1, 1, 9, 0),
                    datetime(2013, 1, 2, 9, 0),
                    datetime(2013, 1, 1, 9, 0),
                    datetime(2013, 1, 2, 9, 0),
                ],
                "dt2": [
                    datetime(2014, 1, 1, 9, 0),
                    datetime(2014, 1, 1, 9, 0),
                    datetime(2014, 1, 2, 9, 0),
                    datetime(2014, 1, 2, 9, 0),
                ],
                "data1": np.arange(4, dtype="int64"),
                "data2": np.arange(4, dtype="int64"),
            }
        )

        df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific"))
        df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo"))

        exp_col1 = Index(["data1", "data1", "data2", "data2"])
        exp_col2 = pd.DatetimeIndex(["2014/01/01 09:00", "2014/01/02 09:00"] * 2, name="dt2", tz="Asia/Tokyo")
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame(
            [[0, 2, 0, 2], [1, 3, 1, 3]],
            index=pd.DatetimeIndex(["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"),
            columns=exp_col,
        )

        pv = df.pivot(index="dt1", columns="dt2")
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame(
            [[0, 2], [1, 3]],
            index=pd.DatetimeIndex(["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"),
            columns=pd.DatetimeIndex(["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"),
        )

        pv = df.pivot(index="dt1", columns="dt2", values="data1")
        tm.assert_frame_equal(pv, expected)
Example #17
0
    def test_pivot_with_tz(self):
        # GH 5878
        df = DataFrame({'dt1': [datetime(2013, 1, 1, 9, 0),
                                datetime(2013, 1, 2, 9, 0),
                                datetime(2013, 1, 1, 9, 0),
                                datetime(2013, 1, 2, 9, 0)],
                        'dt2': [datetime(2014, 1, 1, 9, 0),
                                datetime(2014, 1, 1, 9, 0),
                                datetime(2014, 1, 2, 9, 0),
                                datetime(2014, 1, 2, 9, 0)],
                        'data1': np.arange(4, dtype='int64'),
                        'data2': np.arange(4, dtype='int64')})

        df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific'))
        df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo'))

        exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
        exp_col2 = pd.DatetimeIndex(['2014/01/01 09:00',
                                     '2014/01/02 09:00'] * 2,
                                    name='dt2', tz='Asia/Tokyo')
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
                             index=pd.DatetimeIndex(['2013/01/01 09:00',
                                                     '2013/01/02 09:00'],
                                                    name='dt1',
                                                    tz='US/Pacific'),
                             columns=exp_col)

        pv = df.pivot(index='dt1', columns='dt2')
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame([[0, 2], [1, 3]],
                             index=pd.DatetimeIndex(['2013/01/01 09:00',
                                                     '2013/01/02 09:00'],
                                                    name='dt1',
                                                    tz='US/Pacific'),
                             columns=pd.DatetimeIndex(['2014/01/01 09:00',
                                                       '2014/01/02 09:00'],
                                                      name='dt2',
                                                      tz='Asia/Tokyo'))

        pv = df.pivot(index='dt1', columns='dt2', values='data1')
        tm.assert_frame_equal(pv, expected)
Example #18
0
    def test_pivot_periods(self):
        df = DataFrame(
            {
                "p1": [
                    pd.Period("2013-01-01", "D"),
                    pd.Period("2013-01-02", "D"),
                    pd.Period("2013-01-01", "D"),
                    pd.Period("2013-01-02", "D"),
                ],
                "p2": [
                    pd.Period("2013-01", "M"),
                    pd.Period("2013-01", "M"),
                    pd.Period("2013-02", "M"),
                    pd.Period("2013-02", "M"),
                ],
                "data1": np.arange(4, dtype="int64"),
                "data2": np.arange(4, dtype="int64"),
            }
        )

        exp_col1 = Index(["data1", "data1", "data2", "data2"])
        exp_col2 = pd.PeriodIndex(["2013-01", "2013-02"] * 2, name="p2", freq="M")
        exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
        expected = DataFrame(
            [[0, 2, 0, 2], [1, 3, 1, 3]],
            index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
            columns=exp_col,
        )

        pv = df.pivot(index="p1", columns="p2")
        tm.assert_frame_equal(pv, expected)

        expected = DataFrame(
            [[0, 2], [1, 3]],
            index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"),
            columns=pd.PeriodIndex(["2013-01", "2013-02"], name="p2", freq="M"),
        )

        pv = df.pivot(index="p1", columns="p2", values="data1")
        tm.assert_frame_equal(pv, expected)
Example #19
0
def GetPricngData():
    try:
        query = "SELECT DISTINCT a.Store AS Store,a.`Top Level Category`,ROUND(AVG(a.Price),2) as CompetitorsPrice, ROUND(b.MyStorePrice,2) as MyStorePrice FROM pricing  a JOIN (SELECT DISTINCT `Top Level Category` AS category,AVG(Price) AS MyStorePrice FROM pricing WHERE Store = 'My Store' GROUP BY Store,`Top Level Category`)  b ON a.`Top Level Category` = b.category GROUP BY a.Store,a.`Top Level Category`"
        result = list(ExecuteQuery(query))
        df = DataFrame(result)
        pi = df.pivot(index="Top Level Category", columns="Store", values="CompetitorsPrice")
        pi[np.isnan(pi)] = 0
        pricingResult = {}
        pricingResult["index"] = pi.index.tolist()
        pricingResult["columns"] = pi.columns.tolist()
        pricingResult["values"] = pi.values.tolist()
        return json.dumps(pricingResult)
    except Exception as e:
        return e.message
         ,['mercredi','temperature',28]
         ,['mercredi','ensoleillement',4]
         ,['mercredi','pollution',5]
         ,['mercredi','pluie',100]
         ,['jeudi','temperature',28]
         ,['jeudi','ensoleillement',4]
         ,['jeudi','pollution',5]
         ,['jeudi','pluie',100]
         ,['vendredi','temperature',28]
         ,['vendredi','ensoleillement',4]
         ,['vendredi','pollution',5]
         ,['vendredi','pluie',100]
         ]

cities_data = DataFrame(releves, columns=['jour','attribute','value'])
cities_data.pivot('jour','attribute','value')


aliments = pd.read_csv('aliments.csv', sep='\t')

aliments_with_traces = aliments.ix[aliments.traces.dropna().index]
traces_iter = (set(x.split(',')) for x in aliments_with_traces['traces'])
traces = set.union(*traces_iter)
dummies = DataFrame(np.zeros((len(aliments_with_traces), len(traces))), columns=traces)

for i, tr in enumerate(aliments_with_traces.traces):
    dummies.ix[i, tr.split(',')] = 1


pd.value_counts(pd.qcut(aliments[u'energy_100g'].dropna(),5))
pd.value_counts(pd.cut(aliments[u'energy_100g'].dropna(),5))
Example #21
0
    def test_pivot_integer_bug(self):
        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")])

        result = df.pivot(index=1, columns=0, values=2)
        repr(result)
        tm.assert_index_equal(result.columns, Index(['A', 'B'], name=0))
Example #22
0
 def test_pivot_empty(self):
     df = DataFrame({}, columns=['a', 'b', 'c'])
     result = df.pivot('a', 'b', 'c')
     expected = DataFrame({})
     tm.assert_frame_equal(result, expected, check_names=False)
Example #23
0
 def test_pivot_duplicates(self):
     data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'],
                       'b': ['one', 'two', 'one', 'one', 'two'],
                       'c': [1., 2., 3., 3., 4.]})
     with tm.assert_raises_regex(ValueError, 'duplicate entries'):
         data.pivot('a', 'b', 'c')
mergeM = pd.merge(sales, bonus, on = 'ID')                        # A many-to-many Join
stack = pd.concat([employee, sales], ignore_index = True)         # Vertical Stacking

#############################################################################################################
# 8. Reshaping & Pivoting
#############################################################################################################
df1 = DataFrame([['Big','LAX',3,np.nan],['Big','SFO',6,7],['Med','SEA-TAC',9,np.nan],['Small','POR',np.nan,np.nan]],
                 index=pd.Index(['LA', 'SF', 'SEA', 'POR']),
                 columns=pd.Index(['Type', 'Airport', 'Cool Factor','D']))

# .unstack(): used to convert columns into rows and into a hierarchical index 
df2 = df1.stack(dropna = False)                    # converts columns into the child index
df3 = df1.unstack()                                # converts columns into the parent index 

# .pivot(index, columns, values) is used to reshape data like dplyr in R
df4 = df1.pivot('Airport','Type','Cool Factor')    # yes! its that easy to reshape!

#############################################################################################################
# 9. Outlier Analysis
#############################################################################################################
np.random.seed(12345)
df = DataFrame(np.random.randn(1000,4))
df.describe()                                        # assume outliers are in the -+3 region

df[0][np.abs(df[0])>3]                               # show all rows in column 0 that are > abs(3)
df[(np.abs(df)>3).any(1)]                            # show all values in the dataframe that are > abs(3)
df[np.abs(df)>3] = np.sign(df) * 3                   # caps all values > abs(3) to 3; .sign()                                

#############################################################################################################
# 10. Binning Data
#############################################################################################################
Example #25
0
replacements = {'one':-1, 'two':-2}
df.fillna(value=replacements)

df = DataFrame(array([[1, 3],[1, 2],[3, 2],[2,1]]), columns=['one','two'])
df.sort(columns='one')
df.sort(columns=['one','two'])
df.sort(columns=['one','two'], ascending=[0,1])

prices = [101.0,102.0,103.0]
tickers = ['GOOG','AAPL']
data = [v for v in itertools.product(tickers,prices)]
dates = pandas.date_range('2013-01-03',periods=3)
df = DataFrame(data, columns=['ticker','price'])
df['dates'] = dates.append(dates)
df
df.pivot(index='dates',columns='ticker',values='price')

original = DataFrame([[1,1],[2,2],[3.0,3]],index=['a','b','c'], columns=['one','two'])
original.reindex(index=['b','c','d'])
different = DataFrame([[1,1],[2,2],[3.0,3]],index=['c','d','e'], columns=['one','two'])
original.reindex_like(different)
original.reindex_axis(['two','one'], axis = 1)

left = DataFrame([[1,2],[3,4],[5,6]],columns=['one','two'])
right = DataFrame([[1,2],[3,4],[7,8]],columns=['one','three'])
left.merge(right,on='one') # Same as how='inner'
left.merge(right,on='one', how='left')
left.merge(right,on='one', how='right')
left.merge(right,on='one', how='outer')

left = DataFrame([[1,2],[3,4],[5,6]],columns=['one','two'])
Example #26
0
 def test_pivot_duplicates(self):
     data = DataFrame({'a': ['bar', 'bar', 'foo', 'foo', 'foo'],
                       'b': ['one', 'two', 'one', 'one', 'two'],
                       'c': [1., 2., 3., 3., 4.]})
     with pytest.raises(ValueError, match='duplicate entries'):
         data.pivot('a', 'b', 'c')
Example #27
0
 def test_pivot_empty(self):
     df = DataFrame({}, columns=["a", "b", "c"])
     result = df.pivot("a", "b", "c")
     expected = DataFrame({})
     assert_frame_equal(result, expected, check_names=False)
replacements = {"one": -1, "two": -2}
df.fillna(value=replacements)

df = DataFrame(array([[1, 3], [1, 2], [3, 2], [2, 1]]), columns=["one", "two"])
df.sort(columns="one")
df.sort(columns=["one", "two"])
df.sort(columns=["one", "two"], ascending=[0, 1])

prices = [101.0, 102.0, 103.0]
tickers = ["GOOG", "AAPL"]
data = [v for v in itertools.product(tickers, prices)]
dates = pandas.date_range("2013-01-03", periods=3)
df = DataFrame(data, columns=["ticker", "price"])
df["dates"] = dates.append(dates)
df
df.pivot(index="dates", columns="ticker", values="price")

original = DataFrame([[1, 1], [2, 2], [3.0, 3]], index=["a", "b", "c"], columns=["one", "two"])
original.reindex(index=["b", "c", "d"])
different = DataFrame([[1, 1], [2, 2], [3.0, 3]], index=["c", "d", "e"], columns=["one", "two"])
original.reindex_like(different)
original.reindex_axis(["two", "one"], axis=1)

left = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["one", "two"])
right = DataFrame([[1, 2], [3, 4], [7, 8]], columns=["one", "three"])
left.merge(right, on="one")  # Same as how='inner'
left.merge(right, on="one", how="left")
left.merge(right, on="one", how="right")
left.merge(right, on="one", how="outer")

left = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["one", "two"])
Example #29
0
s2 = Series([4, 5, 6], index=['c', 'd', 'e'])
data2 = pd.concat([s1, s2], keys=['one', 'two'])

# print data2
# print data2.unstack()
# print data2.unstack().stack()
# print data2.unstack().stack(dropna=False)

df = DataFrame({'left': result, 'right': result+5}, columns=pd.Index(['left', 'right'], name='side'))
# print df
# print df.unstack('state')
# print df.unstack('state').stack('side')

xls_file = pd.ExcelFile('C:\\Users\\Administrator\\Desktop\\data.xlsx')
ldata = DataFrame(xls_file.parse('Sheet1'))
pivoted = ldata.pivot('date', 'item', 'value')
# print ldata[:10]
# print pivoted.head()

ldata["value2"] = np.random.randn(len(ldata))
# print ldata[:10]

pivoted = ldata.pivot('date', 'item')
# print pivoted[:5]
# print pivoted['value'][:3]

unstacked = ldata.set_index(['date', 'item']).unstack('item')
# print unstacked[:7]

data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4, 'k2': [1, 1, 2, 3, 3, 4, 4]})
# print data
Example #30
0
    def test_pivot_integer_bug(self):
        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")])

        result = df.pivot(index=1, columns=0, values=2)
        repr(result)
        self.assert_numpy_array_equal(result.columns, ['A', 'B'])