Example #1
1
def main():

    logger = get_root_logger()
    get_header(logger, "LOADING PROJECTIONS")

    client = APIClient()

    # grab dataframe shape from a trial run
    data = client.get_data("weekly-projections", "json", "QB")
    test_df = json_normalize(data["Projections"])

    # get DF structure from columns in test_df
    cols = test_df.columns
    df = DataFrame(columns=cols)

    # grab current week
    current_week = test_df.week.values[0]

    # loop through all weeks up to current week
    for wk in [str(x) for x in range(int(current_week))]:
        logger.info("Processing projections for week {0}".format(int(wk) + 1))
        # loop through all positions
        for pos in ["QB", "RB", "WR", "TE", "K", "DEF"]:
            tmp_data = client.get_data("weekly-projections", "json", pos, wk)
            tmp_df = json_normalize(tmp_data["Projections"])
            df = df.append(tmp_df)

    # import this df directly to PG DB
    conn = DBClient()
    conn.load(df, "projections", schema="raw", if_exists="replace")
Example #2
0
    def test_to_excel_unicode_filename(self):
        _skip_if_no_excelsuite()

        for ext in ["xls", "xlsx"]:
            filename = u"\u0192u." + ext

            try:
                f = open(filename, "wb")
            except UnicodeEncodeError:
                raise nose.SkipTest("no unicode file names on this system")
            else:
                f.close()

            df = DataFrame(
                [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
                index=["A", "B"],
                columns=["X", "Y", "Z"],
            )

            with ensure_clean(filename) as filename:
                df.to_excel(filename, "test1", float_format="%.2f")

                reader = ExcelFile(filename)
                rs = reader.parse("test1", index_col=None)
                xp = DataFrame(
                    [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=["A", "B"], columns=["X", "Y", "Z"]
                )
                tm.assert_frame_equal(rs, xp)
Example #3
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()})

        y.ix[[1, 7], "A"] = np.nan
        y.ix[[6, 15], "B"] = np.nan
        y.ix[[3, 20], "C"] = np.nan
        y.ix[[5, 11], "D"] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack()) for k, v in compat.iteritems(x)))

        weights = x.std("items")
        stack_weights = weights.stack()

        stack_y.index = stack_y.index._tuple_index
        stack_x.index = stack_x.index._tuple_index
        stack_weights.index = stack_weights.index._tuple_index

        result = ols(y=y, x=x, weights=1 / weights)
        expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ["resid", "y_fitted"]:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
    def test_join_aware(self):
        rng = date_range("1/1/2011", periods=10, freq="H")
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts_utc = ts.tz_localize("utc")

        self.assertRaises(Exception, ts.__add__, ts_utc)
        self.assertRaises(Exception, ts_utc.__add__, ts)

        test1 = DataFrame(
            np.zeros((6, 3)), index=date_range("2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central")
        )
        test2 = DataFrame(
            np.zeros((3, 3)),
            index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"),
            columns=lrange(3, 6),
        )

        result = test1.join(test2, how="outer")
        ex_index = test1.index.union(test2.index)

        self.assertTrue(result.index.equals(ex_index))
        self.assertTrue(result.index.tz.zone == "US/Central")

        # non-overlapping
        rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central")

        rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern")

        result = rng.union(rng2)
        self.assertTrue(result.tz.zone == "UTC")
Example #5
0
    def test_passing_dtype(self):
        # see gh-6607
        df = DataFrame(np.random.rand(5, 2).round(4), columns=list("AB"), index=["1A", "1B", "1C", "1D", "1E"])

        with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
            df.to_csv(path)

            # see gh-3795: passing 'str' as the dtype
            result = self.read_csv(path, dtype=str, index_col=0)
            expected = df.astype(str)
            tm.assert_frame_equal(result, expected)

            # for parsing, interpret object as str
            result = self.read_csv(path, dtype=object, index_col=0)
            tm.assert_frame_equal(result, expected)

            # we expect all object columns, so need to
            # convert to test for equivalence
            result = result.astype(float)
            tm.assert_frame_equal(result, df)

            # invalid dtype
            self.assertRaises(TypeError, self.read_csv, path, dtype={"A": "foo", "B": "float64"}, index_col=0)

        # see gh-12048: empty frame
        actual = self.read_csv(StringIO("A,B"), dtype=str)
        expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
        tm.assert_frame_equal(actual, expected)
Example #6
0
def test():
    """DataFrame editor test"""
    from numpy import nan
    from pandas.util.testing import assert_frame_equal, assert_series_equal

    df1 = DataFrame(
        [
            [True, "bool"],
            [1 + 1j, "complex"],
            ["test", "string"],
            [1.11, "float"],
            [1, "int"],
            [np.random.rand(3, 3), "Unkown type"],
            ["Large value", 100],
            ["áéí", "unicode"],
        ],
        index=["a", "b", nan, nan, nan, "c", "Test global max", "d"],
        columns=[nan, "Type"],
    )
    out = test_edit(df1)
    assert_frame_equal(df1, out)

    result = Series([True, "bool"], index=[nan, "Type"], name="a")
    out = test_edit(df1.iloc[0])
    assert_series_equal(result, out)

    # Sorting large DataFrame takes time
    df1 = DataFrame(np.random.rand(100100, 10))
    df1.sort(columns=[0, 1], inplace=True)
    out = test_edit(df1)
    assert_frame_equal(out, df1)

    series = Series(np.arange(10), name=0)
    out = test_edit(series)
    assert_series_equal(series, out)
Example #7
0
 def predict(self, prediction_data):
     preds = DataFrame(prediction_data)
     ret = []
     for row in preds.iterrows():
         index, data = row
         ret.append(mean(data))
     return ret
Example #8
0
    def test_resample_anchored_intraday(self):
        # #1471, #1458

        rng = date_range("1/1/2012", "4/1/2012", freq="100min")
        df = DataFrame(rng.month, index=rng)

        result = df.resample("M")
        expected = df.resample("M", kind="period").to_timestamp(how="end")
        tm.assert_frame_equal(result, expected)

        result = df.resample("M", closed="left")
        exp = df.tshift(1, freq="D").resample("M", kind="period")
        exp = exp.to_timestamp(how="end")

        tm.assert_frame_equal(result, exp)

        rng = date_range("1/1/2012", "4/1/2012", freq="100min")
        df = DataFrame(rng.month, index=rng)

        result = df.resample("Q")
        expected = df.resample("Q", kind="period").to_timestamp(how="end")
        tm.assert_frame_equal(result, expected)

        result = df.resample("Q", closed="left")
        expected = df.tshift(1, freq="D").resample("Q", kind="period", closed="left")
        expected = expected.to_timestamp(how="end")
        tm.assert_frame_equal(result, expected)

        ts = _simple_ts("2012-04-29 23:00", "2012-04-30 5:00", freq="h")
        resampled = ts.resample("M")
        self.assertEqual(len(resampled), 1)
Example #9
0
    def test_frame_non_unique_columns(self):
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "x"])

        self.assertRaises(ValueError, df.to_json, orient="index")
        self.assertRaises(ValueError, df.to_json, orient="columns")
        self.assertRaises(ValueError, df.to_json, orient="records")

        assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split", dtype=False))
        unser = read_json(df.to_json(orient="values"), orient="values")
        np.testing.assert_equal(df.values, unser.values)

        # GH4377; duplicate columns not processing correctly
        df = DataFrame([["a", "b"], ["c", "d"]], index=[1, 2], columns=["x", "y"])
        result = read_json(df.to_json(orient="split"), orient="split")
        assert_frame_equal(result, df)

        def _check(df):
            result = read_json(df.to_json(orient="split"), orient="split", convert_dates=["x"])
            assert_frame_equal(result, df)

        for o in [
            [["a", "b"], ["c", "d"]],
            [[1.5, 2.5], [3.5, 4.5]],
            [[1, 2.5], [3, 4.5]],
            [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]],
        ]:
            _check(DataFrame(o, index=[1, 2], columns=["x", "x"]))
Example #10
0
 def test_partially_invalid_plot_data(self):
     kinds = "line", "bar", "barh", "kde", "density"
     df = DataFrame(randn(10, 2), dtype=object)
     df[np.random.rand(df.shape[0]) > 0.5] = "a"
     for kind in kinds:
         with tm.assertRaises(TypeError):
             df.plot(kind=kind)
Example #11
0
    def test_to_string_float_index(self):
        index = Index([1.5, 2, 3, 4, 5])
        df = DataFrame(range(5), index=index)

        result = df.to_string()
        expected = "     0\n" "1.5  0\n" "2    1\n" "3    2\n" "4    3\n" "5    4"
        self.assertEqual(result, expected)
Example #12
0
    def test_to_string_no_index(self):
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

        df_s = df.to_string(index=False)
        expected = " x  y\n 1  4\n 2  5\n 3  6"

        assert df_s == expected
Example #13
0
    def test_to_string_no_header(self):
        df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})

        df_s = df.to_string(header=False)
        expected = "0  1  4\n1  2  5\n2  3  6"

        assert df_s == expected
Example #14
0
def cite_frame_from_wos( wos_fnam, pubdate= None, randomize_ambiguity=False, max_time=None, trim_end=None ) :
    wos = Wos_h5_reader( wos_fnam )
    def fix_spans( d ) :
        
        for k in month_replacements.keys() :
            d = d.replace(k,month_replacements[k])
        return d
    dates = [ " ".join( x ) for x in zip((row['pubdate'].decode('utf-8') for row in wos.h5.root.papers),
                                         (str(row['pubyear']) for row in wos.h5.root.papers)) ]
    # there's a chance that pubyear could be -1 if the original data
    # didn't have a number for pubyear, which would throw data in error; not sure how to handle yet
    cites = Series( [1]*len(dates) )
    #sdates = Series([pandas.lib.Timestamp(fix_spans(x)) for x in dates] )
    sdates = Series( [date_to_number(fix_spans(x), randomize_ambiguity ) for x in dates] )
    df=DataFrame( { 'dates' : sdates, 'cites' : cites } )
    df2=df.groupby('dates').sum()
    if ( pubdate == None ) :
        start_date = df2.index[0]
    else :
        start_date = pandas.lib.Timestamp( pubdate )
    df2.index = Series(df2.index).apply( lambda x : float((x - start_date))/365 )
    #df2.index = Series(df2.index).apply( lambda x : float((x - start_date).days)/365 )
    df2['cumsum'] = df2.cites.cumsum()
    if max_time :
        return df2[df2.index < max_time]
    if trim_end :
        return df2[df2.index < max(df2.index) - trim_end]
    return df2
Example #15
0
    def test_errorbar_plot(self):

        d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
        df = DataFrame(d)
        d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4}
        df_err = DataFrame(d_err)

        # check line plots
        _check_plot_works(df.plot, yerr=df_err, logy=True)
        _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)

        kinds = ["line", "bar", "barh"]
        for kind in kinds:
            _check_plot_works(df.plot, yerr=df_err["x"], kind=kind)
            _check_plot_works(df.plot, yerr=d_err, kind=kind)
            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
            _check_plot_works(df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind)
            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind)

        _check_plot_works((df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True)

        # yerr is raw error values
        _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4)
        _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4)

        # yerr is column name
        df["yerr"] = np.ones(12) * 0.2
        _check_plot_works(df.plot, y="y", x="x", yerr="yerr")

        with tm.assertRaises(ValueError):
            df.plot(yerr=np.random.randn(11))

        df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12})
        with tm.assertRaises(TypeError):
            df.plot(yerr=df_err)
Example #16
0
    def test_grouped_hist(self):
        import matplotlib.pyplot as plt

        df = DataFrame(randn(500, 2), columns=["A", "B"])
        df["C"] = np.random.randint(0, 4, 500)
        axes = plotting.grouped_hist(df.A, by=df.C)
        self.assert_(len(axes.ravel()) == 4)

        plt.close("all")
        axes = df.hist(by=df.C)
        self.assert_(axes.ndim == 2)
        self.assert_(len(axes.ravel()) == 4)

        for ax in axes.ravel():
            self.assert_(len(ax.patches) > 0)

        plt.close("all")
        # make sure kwargs to hist are handled
        axes = plotting.grouped_hist(df.A, by=df.C, normed=True, cumulative=True, bins=4)

        # height of last bin (index 5) must be 1.0
        for ax in axes.ravel():
            height = ax.get_children()[5].get_height()
            self.assertAlmostEqual(height, 1.0)

        plt.close("all")
        axes = plotting.grouped_hist(df.A, by=df.C, log=True)
        # scale of y must be 'log'
        for ax in axes.ravel():
            self.assert_(ax.get_yscale() == "log")

        plt.close("all")
        # propagate attr exception from matplotlib.Axes.hist
        self.assertRaises(AttributeError, plotting.grouped_hist, df.A, by=df.C, foo="bar")
Example #17
0
    def test_resample_axis1(self):
        rng = date_range("1/1/2000", "2/29/2000")
        df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"])

        result = df.resample("M", axis=1)
        expected = df.T.resample("M").T
        tm.assert_frame_equal(result, expected)
Example #18
0
    def test_frame_mixedtype_orient(self):  # GH10289
        vals = [
            [10, 1, "foo", 0.1, 0.01],
            [20, 2, "bar", 0.2, 0.02],
            [30, 3, "baz", 0.3, 0.03],
            [40, 4, "qux", 0.4, 0.04],
        ]

        df = DataFrame(vals, index=list("abcd"), columns=["1st", "2nd", "3rd", "4th", "5th"])

        self.assertTrue(df._is_mixed_type)
        right = df.copy()

        for orient in ["split", "index", "columns"]:
            inp = df.to_json(orient=orient)
            left = read_json(inp, orient=orient, convert_axes=False)
            assert_frame_equal(left, right)

        right.index = np.arange(len(df))
        inp = df.to_json(orient="records")
        left = read_json(inp, orient="records", convert_axes=False)
        assert_frame_equal(left, right)

        right.columns = np.arange(df.shape[1])
        inp = df.to_json(orient="values")
        left = read_json(inp, orient="values", convert_axes=False)
        assert_frame_equal(left, right)
Example #19
0
    def components(self):
        """
        Return a dataframe of the components (days, hours, minutes,
        seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas.

        Returns
        -------
        a DataFrame
        """
        from pandas import DataFrame

        columns = ["days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds"]
        hasnans = self.hasnans
        if hasnans:

            def f(x):
                if isnull(x):
                    return [np.nan] * len(columns)
                return x.components

        else:

            def f(x):
                return x.components

        result = DataFrame([f(x) for x in self])
        result.columns = columns
        if not hasnans:
            result = result.astype("int64")
        return result
Example #20
0
    def test_v12_compat(self):
        df = DataFrame(
            [
                [1.56808523, 0.65727391, 1.81021139, -0.17251653],
                [-0.2550111, -0.08072427, -0.03202878, -0.17581665],
                [1.51493992, 0.11805825, 1.629455, -1.31506612],
                [-0.02765498, 0.44679743, 0.33192641, -0.27885413],
                [0.05951614, -2.69652057, 1.28163262, 0.34703478],
            ],
            columns=["A", "B", "C", "D"],
            index=pd.date_range("2000-01-03", "2000-01-07"),
        )
        df["date"] = pd.Timestamp("19920106 18:21:32.12")
        df.ix[3, "date"] = pd.Timestamp("20130101")
        df["modified"] = df["date"]
        df.ix[1, "modified"] = pd.NaT

        v12_json = os.path.join(self.dirpath, "tsframe_v012.json")
        df_unser = pd.read_json(v12_json)
        assert_frame_equal(df, df_unser)

        df_iso = df.drop(["modified"], axis=1)
        v12_iso_json = os.path.join(self.dirpath, "tsframe_iso_v012.json")
        df_unser_iso = pd.read_json(v12_iso_json)
        assert_frame_equal(df_iso, df_unser_iso)
Example #21
0
 def predict(self, prediction_data):
     df = DataFrame(prediction_data)
     ret = []
     for row in df.iterrows():
         index, data = row
         ret += [self.agg(data.tolist())]
     return ret
Example #22
0
    def test_reconstruction_index(self):

        df = DataFrame([[1, 2, 3], [4, 5, 6]])
        result = read_json(df.to_json())

        # the index is serialized as strings....correct?
        assert_frame_equal(result, df)
Example #23
0
    def test_time(self):
        import matplotlib.pyplot as plt

        plt.close("all")

        t = datetime(1, 1, 1, 3, 30, 0)
        deltas = np.random.randint(1, 20, 3).cumsum()
        ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas])
        df = DataFrame({"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts)
        ax = df.plot()

        # verify tick labels
        ticks = ax.get_xticks()
        labels = ax.get_xticklabels()
        for t, l in zip(ticks, labels):
            m, s = divmod(int(t), 60)
            h, m = divmod(m, 60)
            xp = l.get_text()
            if len(xp) > 0:
                rs = time(h, m, s).strftime("%H:%M:%S")
                self.assert_(xp, rs)

        # change xlim
        ax.set_xlim("1:30", "5:00")

        # check tick labels again
        ticks = ax.get_xticks()
        labels = ax.get_xticklabels()
        for t, l in zip(ticks, labels):
            m, s = divmod(int(t), 60)
            h, m = divmod(m, 60)
            xp = l.get_text()
            if len(xp) > 0:
                rs = time(h, m, s).strftime("%H:%M:%S")
                self.assert_(xp, rs)
Example #24
0
 def test_nonzero_base(self):
     # GH2571
     idx = date_range("2012-12-20", periods=24, freq="H") + timedelta(minutes=30)
     df = DataFrame(np.arange(24), index=idx)
     ax = df.plot()
     rs = ax.get_lines()[0].get_xdata()
     self.assertFalse(Index(rs).is_normalized)
Example #25
0
    def test_get_numeric_data_preserve_dtype(self):

        # get the numeric data
        o = DataFrame({"A": [1, "2", 3.0]})
        result = o._get_numeric_data()
        expected = DataFrame(index=[0, 1, 2], dtype=object)
        self._compare(result, expected)
Example #26
0
    def test_axis_shared(self):
        # GH4089
        import matplotlib.pyplot as plt

        def tick_text(tl):
            return [x.get_text() for x in tl]

        n = 100
        df = DataFrame(
            {
                "gender": np.array(["Male", "Female"])[random.randint(2, size=n)],
                "height": random.normal(66, 4, size=n),
                "weight": random.normal(161, 32, size=n),
            }
        )
        ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True)
        self.assert_(ax1._shared_x_axes.joined(ax1, ax2))
        self.assertFalse(ax1._shared_y_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_x_axes.joined(ax1, ax2))
        self.assertFalse(ax2._shared_y_axes.joined(ax1, ax2))
        plt.close("all")

        ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True)
        self.assertFalse(ax1._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax1._shared_y_axes.joined(ax1, ax2))
        self.assertFalse(ax2._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_y_axes.joined(ax1, ax2))
        plt.close("all")

        ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True)
        self.assert_(ax1._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax1._shared_y_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_y_axes.joined(ax1, ax2))
 def test_frame_reset_index(self):
     dr = date_range("2012-06-02", periods=10, tz=self.tzstr("US/Eastern"))
     df = DataFrame(np.random.randn(len(dr)), dr)
     roundtripped = df.reset_index().set_index("index")
     xp = df.index.tz
     rs = roundtripped.index.tz
     self.assertEqual(xp, rs)
Example #28
0
 def test_unsorted_index(self):
     df = DataFrame({"y": np.arange(100)}, index=np.arange(99, -1, -1))
     ax = df.plot()
     l = ax.get_lines()[0]
     rs = l.get_xydata()
     rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64)
     tm.assert_series_equal(rs, df.y)
Example #29
0
    def test_legend_name(self):
        multi = DataFrame(randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])])
        multi.columns.names = ["group", "individual"]

        ax = multi.plot()
        leg_title = ax.legend_.get_title()
        self.assert_(leg_title.get_text(), "group,individual")
Example #30
0
 def test_delevel_infer_dtype(self):
     tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])]
     index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
     df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
     deleveled = df.reset_index()
     self.assert_(com.is_integer_dtype(deleveled["prm1"]))
     self.assert_(com.is_float_dtype(deleveled["prm2"]))