Esempio n. 1
0
target = pro.daily(ts_code=stock + ".SH")
target = target[["trade_date", "open", "close", "high", "low"]]
target["trade_date"] = pd.to_datetime(target["trade_date"], format="%Y%m%d")
target.index = target["trade_date"]
target.sort_index(ascending=True, inplace=True)
'''
today = ts.get_realtime_quotes(stock) #Single stock symbol
today["date"] = pd.to_datetime(today["date"], format="%Y-%m-%d")
today.index = today["date"]
#添加实时数据到历史列表中
hist.loc[today["date"].iloc[0]] = [today["date"].iloc[0],today["open"].iloc[0],today["price"].iloc[0]]
target.loc[today["date"].iloc[0]] = [today["date"].iloc[0],today["open"].iloc[0],today["price"].iloc[0]]
'''

p = pd.Period(start, freq="B") + 1

csv = pd.DataFrame(columns=['交易类型', '交易价格', '持仓数量', '可用资金', '人民币总资产'])


class Account():
    def __init__(self, capital_base, previous_date):
        self.capital_base = capital_base
        self.current_positions = []
        self.target_position = []
        self.stock_amounts = {}
        self.previous_date = pd.Period(previous_date, freq="B")
        self.cash = 10000

    def order_to(stock, amounts):
        print(1)
Esempio n. 2
0
    def proforma_report(self, apply_inflation_rate_func, fill_forward_func,
                        results):
        """ Calculates the proforma that corresponds to participation in this value stream

        Args:
            apply_inflation_rate_func:
            fill_forward_func:
            results (pd.DataFrame):

        Returns: A DateFrame of with each year in opt_year as the index and
            the corresponding value this stream provided.

        """
        pro_forma = super().proforma_report(apply_inflation_rate_func,
                                            fill_forward_func, results)
        tech_id = self.unique_tech_id()
        if self.variables_df.empty:
            return pro_forma
        optimization_years = self.variables_df.index.year.unique()

        # OM COSTS
        om_costs = pd.DataFrame()
        cumulative_energy_dispatch_kw = pd.DataFrame()
        elec = self.variables_df['elec']
        udis = self.variables_df['udis']
        dis_column_name = tech_id + ' Cumulative Energy Dispatch (kW)'
        variable_column_name = tech_id + ' Variable O&M Costs'
        for year in optimization_years:
            index_yr = pd.Period(year=year, freq='y')
            # add fixed o&m costs
            om_costs.loc[index_yr, self.fixed_column_name()] = -self.fixed_om
            # add variable costs
            elec_sub = elec.loc[elec.index.year == year]
            udis_sub = udis.loc[udis.index.year == year]
            om_costs.loc[index_yr, variable_column_name] = -self.variable_om
            cumulative_energy_dispatch_kw.loc[
                index_yr,
                dis_column_name] = np.sum(elec_sub) + np.sum(udis_sub)

        # fill forward (escalate rates)
        om_costs = fill_forward_func(om_costs, None, is_om_cost=True)

        # interpolate cumulative energy dispatch between analysis years
        #   be careful to not include years labeled as Strings (CAPEX)
        years_list = list(
            filter(lambda x: not (type(x) is str), om_costs.index))
        analysis_start_year = min(years_list).year
        analysis_end_year = max(years_list).year
        cumulative_energy_dispatch_kw = self.interpolate_energy_dispatch(
            cumulative_energy_dispatch_kw, analysis_start_year,
            analysis_end_year, None)
        # calculate om costs in dollars, as rate * energy
        # fixed om is already in $
        # variable om
        om_costs.loc[:,
                     variable_column_name] = om_costs.loc[:,
                                                          variable_column_name] * self.dt * cumulative_energy_dispatch_kw.loc[:,
                                                                                                                              dis_column_name]
        # append with super class's proforma
        pro_forma = pd.concat([pro_forma, om_costs], axis=1)

        # fuel costs in $/kW
        fuel_costs = pd.DataFrame()
        fuel_col_name = tech_id + ' Fuel Costs'
        for year in optimization_years:
            elec_sub = elec.loc[elec.index.year == year]
            udis_sub = udis.loc[udis.index.year == year]
            # add fuel costs in $/kW
            fuel_costs.loc[pd.Period(year=year, freq='y'),
                           fuel_col_name] = -np.sum(
                               self.heat_rate * self.fuel_cost * self.dt *
                               (elec_sub + udis_sub))
        # fill forward
        fuel_costs = fill_forward_func(fuel_costs, None)
        # append with super class's proforma
        pro_forma = pd.concat([pro_forma, fuel_costs], axis=1)

        return pro_forma
Esempio n. 3
0
 def test_constructor_period_incompatible_frequency(self):
     data = [pd.Period('2000', 'D'), pd.Period('2001', 'A')]
     result = pd.Series(data)
     assert result.dtype == object
     assert result.tolist() == data
Esempio n. 4
0
def dashboard_sourceData_temp(request):
    if request.method == 'GET':
        json_out = {}
        main_out = {}
        days_num = 0
        data = {}

        try:
            # sourcedata
            days_list = []
            topic_list = Topic.objects
            day = pd.Period(datetime.datetime.now(),freq='D')
            # logger.info(type(Topic.objects)) # Topic.objects.all()Topic.objects()返回类型相同
            datatype_list = Datatype_name.objects

            post_7days = Post.objects(Q(pt_time__gte=datetime.datetime.combine\
                             (date.today()-datetime.timedelta(6),datetime.time.min)) &
                             Q(pt_time__lte=datetime.datetime.combine(date.today(), datetime.time.max)))

            while days_num < 7:
                day_dict = {}
                day_str = day.strftime('%Y%m%d')
                day_dict['time'] = day_str

                post = post_7days(Q(pt_time__gte=\
                             datetime.datetime.combine(date.today()-\
                             datetime.timedelta(days=days_num), \
                             datetime.time.min)) & \
                             Q(pt_time__lte=datetime.datetime.combine\
                             (date.today()-datetime.timedelta(days=days_num), \
                             datetime.time.max)))

                for topic in topic_list:
                    for data in datatype_list:
                        day_dict = {}
                        day_dict['time'] = day_str
                        day_dict['topic_id'] = topic._id
                        day_dict['topic_name'] = topic.topic_name
                        day_dict['dataTypeName'] = data.datatype_name
                        day_dict['data_type'] = data.data_type
                        post_datatype = post(Q(data_type=data.data_type) & Q(topic_id=topic._id))
                        # logger.info('post_num = ' + str(len(post_datatype)))
                        day_dict['post_num'] = post_datatype.count()
                        days_list.append(day_dict)


                # for data in datatype_list:
                #     day_dict['dataTypeName'] = data.datatype_name
                #     day_dict['data_type'] = data.data_type
                #     post_datatype = post(Q(data_type=data.data_type))
                #     # logger.info('post_num = ' + str(len(post_datatype)))
                #     day_dict['post_num'] = len(post_datatype)

                #     day_dict_ = dict(day_dict)
                #     days_list.append(day_dict_)

                day -= 1
                days_num += 1


            #######  Hot for all host posts
            hot_dict = {}
            hot_posts_temp = post_7days(Q(data_type=3))
            hot_posts = hot_posts_temp.order_by \
                                   ('-comm_num')[:10].only("_id", "url", \
                                     "board", "title", "content", "pt_time", \
                                     "img_url","comm_num","repost_num")

            hot_weibo_temp = post_7days(Q(data_type=2))
            hot_weibo = hot_weibo_temp.order_by \
                                 ('-comm_num')[:10].only("_id", "url", \
                                 "board", "title", "content", "pt_time", \
                                "img_url","comm_num","repost_num")

            hot_poster = hot_posts_temp.order_by('-poster.post_num')[:10].only("poster")

            hot_weibouser_post_num = hot_weibo_temp.order_by('-poster.post_num').only("poster")
            hot_weibouser = []
            hot_weibouser_id = []
            num_ten=0
            for item in hot_weibouser_post_num:
              if num_ten ==10:
                break
              else:
                if item.poster.id not in hot_weibouser_id:
                  hot_weibouser.append(item)
                  hot_weibouser_id.append(item.poster.id)
                  num_ten=num_ten+1

            hot_dict['hotPost'] = handle_post_list1(hot_posts)
            hot_dict['hotPoster'] = handle_poster_list1(hot_poster)
            hot_dict['hotWeibo'] = handle_post_list1(hot_weibo)
            # hot_dict['hotPoster'] = handle_poster_list(hot_posts)

            hot_dict['hotWeiboUser'] = handle_weibouser_list1(hot_weibouser)


             #######  map data
            mapData_list = [{'id':'001',
                                      'pro':"陕西",
                                      'nums':52
                                     },
                                     {
                                      'id':'002',
                                      'pro':"北京",
                                      'nums':100
                                     },
                                     {
                                     'id':'003',
                                      'pro':"上海",
                                      'nums':60
                                      },
                                     {
                                     'id':'004',
                                      'pro':"杭州",
                                      'nums':48
                                      },
                                     {
                                     'id':'005',
                                     'pro':"南京",
                                     'nums':50
                                     }
                                 ]

            wordlist = []
            wordres=Cloud_formain.objects(Q(topic_id=999)).only("word", "frequency")
            for worditem in wordres:
                temp={}
                temp['word']=worditem.word
                temp['weight']=worditem.frequency
                wordlist.append(temp)

            

            main_out['mapData'] = mapData_list
            main_out['sourceData'] = days_list
            main_out['Hot'] = hot_dict
            main_out['word_cloud'] = wordlist

            json_out['code'] = 0
            json_out['success'] = True
            json_out['data'] = main_out
        except:
            traceback.print_exc()
            json_out['code'] = 1
            json_out['data'] = {}
            json_out['success'] = False

        return HttpResponse(json.dumps(json_out, cls=MyEncoder),content_type="application/json")
Esempio n. 5
0
    # Since right now, by default MI will drop NA from levels when we create MI
    # via `from_*`, so we need to add NA for level manually afterwards.
    if not dropna:
        mi = mi.set_levels(["A", "B", np.nan], level="b")
    expected = pd.DataFrame(outputs, index=mi)

    tm.assert_frame_equal(grouped, expected)


@pytest.mark.parametrize(
    "datetime1, datetime2",
    [
        (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")),
        (pd.Timedelta("-2 days"), pd.Timedelta("-1 days")),
        (pd.Period("2020-01-01"), pd.Period("2020-02-01")),
    ],
)
@pytest.mark.parametrize("dropna, values", [(True, [12, 3]),
                                            (False, [12, 3, 6])])
def test_groupby_dropna_datetime_like_data(dropna, values, datetime1,
                                           datetime2, unique_nulls_fixture,
                                           unique_nulls_fixture2):
    # 3729
    df = pd.DataFrame({
        "values": [1, 2, 3, 4, 5, 6],
        "dt": [
            datetime1,
            unique_nulls_fixture,
            datetime2,
            unique_nulls_fixture2,
Esempio n. 6
0
class TestSeriesReplace:
    def test_replace_explicit_none(self):
        # GH#36984 if the user explicitly passes value=None, give it to them
        ser = pd.Series([0, 0, ""], dtype=object)
        result = ser.replace("", None)
        expected = pd.Series([0, 0, None], dtype=object)
        tm.assert_series_equal(result, expected)

        df = pd.DataFrame(np.zeros((3, 3)))
        df.iloc[2, 2] = ""
        result = df.replace("", None)
        expected = pd.DataFrame(
            {
                0: np.zeros(3),
                1: np.zeros(3),
                2: np.array([0.0, 0.0, None], dtype=object),
            }
        )
        assert expected.iloc[2, 2] is None
        tm.assert_frame_equal(result, expected)

        # GH#19998 same thing with object dtype
        ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
        result = ser.replace("a", None)
        expected = pd.Series([10, 20, 30, None, None, "b", None])
        assert expected.iloc[-1] is None
        tm.assert_series_equal(result, expected)

    def test_replace_numpy_nan(self, nulls_fixture):
        # GH#45725 ensure numpy.nan can be replaced with all other null types
        to_replace = np.nan
        value = nulls_fixture
        dtype = object
        ser = pd.Series([to_replace], dtype=dtype)
        expected = pd.Series([value], dtype=dtype)

        result = ser.replace({to_replace: value}).astype(dtype=dtype)
        tm.assert_series_equal(result, expected)
        assert result.dtype == dtype

        # same thing but different calling convention
        result = ser.replace(to_replace, value).astype(dtype=dtype)
        tm.assert_series_equal(result, expected)
        assert result.dtype == dtype

    def test_replace_noop_doesnt_downcast(self):
        # GH#44498
        ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
        res = ser.replace({np.nan: None})  # should be a no-op
        tm.assert_series_equal(res, ser)
        assert res.dtype == object

        # same thing but different calling convention
        res = ser.replace(np.nan, None)
        tm.assert_series_equal(res, ser)
        assert res.dtype == object

    def test_replace(self):
        N = 100
        ser = pd.Series(np.random.randn(N))
        ser[0:4] = np.nan
        ser[6:10] = 0

        # replace list with a single value
        return_value = ser.replace([np.nan], -1, inplace=True)
        assert return_value is None

        exp = ser.fillna(-1)
        tm.assert_series_equal(ser, exp)

        rs = ser.replace(0.0, np.nan)
        ser[ser == 0.0] = np.nan
        tm.assert_series_equal(rs, ser)

        ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
        ser[:5] = np.nan
        ser[6:10] = "foo"
        ser[20:30] = "bar"

        # replace list with a single value
        rs = ser.replace([np.nan, "foo", "bar"], -1)

        assert (rs[:5] == -1).all()
        assert (rs[6:10] == -1).all()
        assert (rs[20:30] == -1).all()
        assert (pd.isna(ser[:5])).all()

        # replace with different values
        rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})

        assert (rs[:5] == -1).all()
        assert (rs[6:10] == -2).all()
        assert (rs[20:30] == -3).all()
        assert (pd.isna(ser[:5])).all()

        # replace with different values with 2 lists
        rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
        tm.assert_series_equal(rs, rs2)

        # replace inplace
        return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
        assert return_value is None

        assert (ser[:5] == -1).all()
        assert (ser[6:10] == -1).all()
        assert (ser[20:30] == -1).all()

    def test_replace_nan_with_inf(self):
        ser = pd.Series([np.nan, 0, np.inf])
        tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))

        ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
        tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
        filled = ser.copy()
        filled[4] = 0
        tm.assert_series_equal(ser.replace(np.inf, 0), filled)

    def test_replace_listlike_value_listlike_target(self, datetime_series):
        ser = pd.Series(datetime_series.index)
        tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))

        # malformed
        msg = r"Replacement lists must match in length\. Expecting 3 got 2"
        with pytest.raises(ValueError, match=msg):
            ser.replace([1, 2, 3], [np.nan, 0])

        # ser is dt64 so can't hold 1 or 2, so this replace is a no-op
        result = ser.replace([1, 2], [np.nan, 0])
        tm.assert_series_equal(result, ser)

        ser = pd.Series([0, 1, 2, 3, 4])
        result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
        tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))

    def test_replace_gh5319(self):
        # API change from 0.12?
        # GH 5319
        ser = pd.Series([0, np.nan, 2, 3, 4])
        expected = ser.ffill()
        result = ser.replace([np.nan])
        tm.assert_series_equal(result, expected)

        ser = pd.Series([0, np.nan, 2, 3, 4])
        expected = ser.ffill()
        result = ser.replace(np.nan)
        tm.assert_series_equal(result, expected)

    def test_replace_datetime64(self):
        # GH 5797
        ser = pd.Series(pd.date_range("20130101", periods=5))
        expected = ser.copy()
        expected.loc[2] = pd.Timestamp("20120101")
        result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
        tm.assert_series_equal(result, expected)
        result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
        tm.assert_series_equal(result, expected)

    def test_replace_nat_with_tz(self):
        # GH 11792: Test with replacing NaT in a list with tz data
        ts = pd.Timestamp("2015/01/01", tz="UTC")
        s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
        result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
        expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
        tm.assert_series_equal(expected, result)

    def test_replace_timedelta_td64(self):
        tdi = pd.timedelta_range(0, periods=5)
        ser = pd.Series(tdi)

        # Using a single dict argument means we go through replace_list
        result = ser.replace({ser[1]: ser[3]})

        expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]])
        tm.assert_series_equal(result, expected)

    def test_replace_with_single_list(self):
        ser = pd.Series([0, 1, 2, 3, 4])
        result = ser.replace([1, 2, 3])
        tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))

        s = ser.copy()
        return_value = s.replace([1, 2, 3], inplace=True)
        assert return_value is None
        tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))

        # make sure things don't get corrupted when fillna call fails
        s = ser.copy()
        msg = (
            r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
            r"\(bfill\)\. Got crash_cymbal"
        )
        with pytest.raises(ValueError, match=msg):
            return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
            assert return_value is None
        tm.assert_series_equal(s, ser)

    def test_replace_mixed_types(self):
        ser = pd.Series(np.arange(5), dtype="int64")

        def check_replace(to_rep, val, expected):
            sc = ser.copy()
            result = ser.replace(to_rep, val)
            return_value = sc.replace(to_rep, val, inplace=True)
            assert return_value is None
            tm.assert_series_equal(expected, result)
            tm.assert_series_equal(expected, sc)

        # 3.0 can still be held in our int64 series, so we do not upcast GH#44940
        tr, v = [3], [3.0]
        check_replace(tr, v, ser)
        # Note this matches what we get with the scalars 3 and 3.0
        check_replace(tr[0], v[0], ser)

        # MUST upcast to float
        e = pd.Series([0, 1, 2, 3.5, 4])
        tr, v = [3], [3.5]
        check_replace(tr, v, e)

        # casts to object
        e = pd.Series([0, 1, 2, 3.5, "a"])
        tr, v = [3, 4], [3.5, "a"]
        check_replace(tr, v, e)

        # again casts to object
        e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
        tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
        check_replace(tr, v, e)

        # casts to object
        e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
        tr, v = [3, 4], [3.5, True]
        check_replace(tr, v, e)

        # test an object with dates + floats + integers + strings
        dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D"))
        result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
        expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
        tm.assert_series_equal(result, expected)

    def test_replace_bool_with_string_no_op(self):
        s = pd.Series([True, False, True])
        result = s.replace("fun", "in-the-sun")
        tm.assert_series_equal(s, result)

    def test_replace_bool_with_string(self):
        # nonexistent elements
        s = pd.Series([True, False, True])
        result = s.replace(True, "2u")
        expected = pd.Series(["2u", False, "2u"])
        tm.assert_series_equal(expected, result)

    def test_replace_bool_with_bool(self):
        s = pd.Series([True, False, True])
        result = s.replace(True, False)
        expected = pd.Series([False] * len(s))
        tm.assert_series_equal(expected, result)

    def test_replace_with_dict_with_bool_keys(self):
        s = pd.Series([True, False, True])
        result = s.replace({"asdf": "asdb", True: "yes"})
        expected = pd.Series(["yes", False, "yes"])
        tm.assert_series_equal(result, expected)

    def test_replace_Int_with_na(self, any_int_ea_dtype):
        # GH 38267
        result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA)
        expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
        tm.assert_series_equal(result, expected)
        result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA)
        result.replace(1, pd.NA, inplace=True)
        tm.assert_series_equal(result, expected)

    def test_replace2(self):
        N = 100
        ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object)
        ser[:5] = np.nan
        ser[6:10] = "foo"
        ser[20:30] = "bar"

        # replace list with a single value
        rs = ser.replace([np.nan, "foo", "bar"], -1)

        assert (rs[:5] == -1).all()
        assert (rs[6:10] == -1).all()
        assert (rs[20:30] == -1).all()
        assert (pd.isna(ser[:5])).all()

        # replace with different values
        rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})

        assert (rs[:5] == -1).all()
        assert (rs[6:10] == -2).all()
        assert (rs[20:30] == -3).all()
        assert (pd.isna(ser[:5])).all()

        # replace with different values with 2 lists
        rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
        tm.assert_series_equal(rs, rs2)

        # replace inplace
        return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
        assert return_value is None
        assert (ser[:5] == -1).all()
        assert (ser[6:10] == -1).all()
        assert (ser[20:30] == -1).all()

    def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
        # GH 32621, GH#44940
        ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
        expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
        result = ser.replace({"one": "1", "two": "2"})
        tm.assert_series_equal(expected, result)

    def test_replace_with_empty_dictlike(self):
        # GH 15289
        s = pd.Series(list("abcd"))
        tm.assert_series_equal(s, s.replace({}))

        with tm.assert_produces_warning(FutureWarning):
            empty_series = pd.Series([])
        tm.assert_series_equal(s, s.replace(empty_series))

    def test_replace_string_with_number(self):
        # GH 15743
        s = pd.Series([1, 2, 3])
        result = s.replace("2", np.nan)
        expected = pd.Series([1, 2, 3])
        tm.assert_series_equal(expected, result)

    def test_replace_replacer_equals_replacement(self):
        # GH 20656
        # make sure all replacers are matching against original values
        s = pd.Series(["a", "b"])
        expected = pd.Series(["b", "a"])
        result = s.replace({"a": "b", "b": "a"})
        tm.assert_series_equal(expected, result)

    def test_replace_unicode_with_number(self):
        # GH 15743
        s = pd.Series([1, 2, 3])
        result = s.replace("2", np.nan)
        expected = pd.Series([1, 2, 3])
        tm.assert_series_equal(expected, result)

    def test_replace_mixed_types_with_string(self):
        # Testing mixed
        s = pd.Series([1, 2, 3, "4", 4, 5])
        result = s.replace([2, "4"], np.nan)
        expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
        tm.assert_series_equal(expected, result)

    @pytest.mark.parametrize(
        "categorical, numeric",
        [
            (pd.Categorical(["A"], categories=["A", "B"]), [1]),
            (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
        ],
    )
    def test_replace_categorical(self, categorical, numeric):
        # GH 24971, GH#23305
        ser = pd.Series(categorical)
        result = ser.replace({"A": 1, "B": 2})
        expected = pd.Series(numeric).astype("category")
        if 2 not in expected.cat.categories:
            # i.e. categories should be [1, 2] even if there are no "B"s present
            # GH#44940
            expected = expected.cat.add_categories(2)
        tm.assert_series_equal(expected, result)

    def test_replace_categorical_single(self):
        # GH 26988
        dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
        s = pd.Series(dti)
        c = s.astype("category")

        expected = c.copy()
        expected = expected.cat.add_categories("foo")
        expected[2] = "foo"
        expected = expected.cat.remove_unused_categories()
        assert c[2] != "foo"

        result = c.replace(c[2], "foo")
        tm.assert_series_equal(expected, result)
        assert c[2] != "foo"  # ensure non-inplace call does not alter original

        return_value = c.replace(c[2], "foo", inplace=True)
        assert return_value is None
        tm.assert_series_equal(expected, c)

        first_value = c[0]
        return_value = c.replace(c[1], c[0], inplace=True)
        assert return_value is None
        assert c[0] == c[1] == first_value  # test replacing with existing value

    def test_replace_with_no_overflowerror(self):
        # GH 25616
        # casts to object without Exception from OverflowError
        s = pd.Series([0, 1, 2, 3, 4])
        result = s.replace([3], ["100000000000000000000"])
        expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
        tm.assert_series_equal(result, expected)

        s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
        result = s.replace(["100000000000000000000"], [1])
        expected = pd.Series([0, 1, "100000000000000000001"])
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "ser, to_replace, exp",
        [
            ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]),
            (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]),
        ],
    )
    def test_replace_commutative(self, ser, to_replace, exp):
        # GH 16051
        # DataFrame.replace() overwrites when values are non-numeric

        series = pd.Series(ser)

        expected = pd.Series(exp)
        result = series.replace(to_replace)

        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])]
    )
    def test_replace_no_cast(self, ser, exp):
        # GH 9113
        # BUG: replace int64 dtype with bool coerces to int64

        series = pd.Series(ser)
        result = series.replace(2, True)
        expected = pd.Series(exp)

        tm.assert_series_equal(result, expected)

    def test_replace_invalid_to_replace(self):
        # GH 18634
        # API: replace() should raise an exception if invalid argument is given
        series = pd.Series(["a", "b", "c "])
        msg = (
            r"Expecting 'to_replace' to be either a scalar, array-like, "
            r"dict or None, got invalid type.*"
        )
        with pytest.raises(TypeError, match=msg):
            series.replace(lambda x: x.strip())

    @pytest.mark.parametrize("frame", [False, True])
    def test_replace_nonbool_regex(self, frame):
        obj = pd.Series(["a", "b", "c "])
        if frame:
            obj = obj.to_frame()

        msg = "'to_replace' must be 'None' if 'regex' is not a bool"
        with pytest.raises(ValueError, match=msg):
            obj.replace(to_replace=["a"], regex="foo")

    @pytest.mark.parametrize("frame", [False, True])
    def test_replace_empty_copy(self, frame):
        obj = pd.Series([], dtype=np.float64)
        if frame:
            obj = obj.to_frame()

        res = obj.replace(4, 5, inplace=True)
        assert res is None

        res = obj.replace(4, 5, inplace=False)
        tm.assert_equal(res, obj)
        assert res is not obj

    def test_replace_only_one_dictlike_arg(self, fixed_now_ts):
        # GH#33340

        ser = pd.Series([1, 2, "A", fixed_now_ts, True])
        to_replace = {0: 1, 2: "A"}
        value = "foo"
        msg = "Series.replace cannot use dict-like to_replace and non-None value"
        with pytest.raises(ValueError, match=msg):
            ser.replace(to_replace, value)

        to_replace = 1
        value = {0: "foo", 2: "bar"}
        msg = "Series.replace cannot use dict-value and non-None to_replace"
        with pytest.raises(ValueError, match=msg):
            ser.replace(to_replace, value)

    def test_replace_extension_other(self, frame_or_series):
        # https://github.com/pandas-dev/pandas/issues/34530
        obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
        result = obj.replace("", "")  # no exception
        # should not have changed dtype
        tm.assert_equal(obj, result)

    def _check_replace_with_method(self, ser: pd.Series):
        df = ser.to_frame()

        res = ser.replace(ser[1], method="pad")
        expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
        tm.assert_series_equal(res, expected)

        res_df = df.replace(ser[1], method="pad")
        tm.assert_frame_equal(res_df, expected.to_frame())

        ser2 = ser.copy()
        res2 = ser2.replace(ser[1], method="pad", inplace=True)
        assert res2 is None
        tm.assert_series_equal(ser2, expected)

        res_df2 = df.replace(ser[1], method="pad", inplace=True)
        assert res_df2 is None
        tm.assert_frame_equal(df, expected.to_frame())

    def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
        arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
        ser = pd.Series(arr)

        self._check_replace_with_method(ser)

    @pytest.mark.parametrize("as_categorical", [True, False])
    def test_replace_interval_with_method(self, as_categorical):
        # in particular interval that can't hold NA

        idx = pd.IntervalIndex.from_breaks(range(4))
        ser = pd.Series(idx)
        if as_categorical:
            ser = ser.astype("category")

        self._check_replace_with_method(ser)

    @pytest.mark.parametrize("as_period", [True, False])
    @pytest.mark.parametrize("as_categorical", [True, False])
    def test_replace_datetimelike_with_method(self, as_period, as_categorical):
        idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
        if as_period:
            idx = idx.tz_localize(None).to_period("D")

        ser = pd.Series(idx)
        ser.iloc[-2] = pd.NaT
        if as_categorical:
            ser = ser.astype("category")

        self._check_replace_with_method(ser)

    def test_replace_with_compiled_regex(self):
        # https://github.com/pandas-dev/pandas/issues/35680
        s = pd.Series(["a", "b", "c"])
        regex = re.compile("^a$")
        result = s.replace({regex: "z"}, regex=True)
        expected = pd.Series(["z", "b", "c"])
        tm.assert_series_equal(result, expected)

    def test_pandas_replace_na(self):
        # GH#43344
        ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string")
        regex_mapping = {
            "AA": "CC",
            "BB": "CC",
            "EE": "CC",
            "CC": "CC-REPL",
        }
        result = ser.replace(regex_mapping, regex=True)
        exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string")
        tm.assert_series_equal(result, exp)

    @pytest.mark.parametrize(
        "dtype, input_data, to_replace, expected_data",
        [
            ("bool", [True, False], {True: False}, [False, False]),
            ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
            ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
            ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
            ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
            ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
            (
                pd.IntervalDtype("int64"),
                IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
                {pd.Interval(1, 2): pd.Interval(10, 20)},
                IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
            ),
            (
                pd.IntervalDtype("float64"),
                IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
                {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
                IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
            ),
            (
                pd.PeriodDtype("M"),
                [pd.Period("2020-05", freq="M")],
                {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
                [pd.Period("2020-06", freq="M")],
            ),
        ],
    )
    def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
        # GH#33484
        ser = pd.Series(input_data, dtype=dtype)
        result = ser.replace(to_replace)
        expected = pd.Series(expected_data, dtype=dtype)
        tm.assert_series_equal(result, expected)

    def test_replace_string_dtype(self):
        # GH#40732, GH#44940
        ser = pd.Series(["one", "two", np.nan], dtype="string")
        res = ser.replace({"one": "1", "two": "2"})
        expected = pd.Series(["1", "2", np.nan], dtype="string")
        tm.assert_series_equal(res, expected)

        # GH#31644
        ser2 = pd.Series(["A", np.nan], dtype="string")
        res2 = ser2.replace("A", "B")
        expected2 = pd.Series(["B", np.nan], dtype="string")
        tm.assert_series_equal(res2, expected2)

        ser3 = pd.Series(["A", "B"], dtype="string")
        res3 = ser3.replace("A", pd.NA)
        expected3 = pd.Series([pd.NA, "B"], dtype="string")
        tm.assert_series_equal(res3, expected3)

    def test_replace_string_dtype_list_to_replace(self):
        # GH#41215, GH#44940
        ser = pd.Series(["abc", "def"], dtype="string")
        res = ser.replace(["abc", "any other string"], "xyz")
        expected = pd.Series(["xyz", "def"], dtype="string")
        tm.assert_series_equal(res, expected)

    def test_replace_string_dtype_regex(self):
        # GH#31644
        ser = pd.Series(["A", "B"], dtype="string")
        res = ser.replace(r".", "C", regex=True)
        expected = pd.Series(["C", "C"], dtype="string")
        tm.assert_series_equal(res, expected)

    def test_replace_nullable_numeric(self):
        # GH#40732, GH#44940

        floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
        assert floats.replace({1.0: 9}).dtype == floats.dtype
        assert floats.replace(1.0, 9).dtype == floats.dtype
        assert floats.replace({1.0: 9.0}).dtype == floats.dtype
        assert floats.replace(1.0, 9.0).dtype == floats.dtype

        res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
        assert res.dtype == floats.dtype

        ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
        assert ints.replace({1: 9}).dtype == ints.dtype
        assert ints.replace(1, 9).dtype == ints.dtype
        assert ints.replace({1: 9.0}).dtype == ints.dtype
        assert ints.replace(1, 9.0).dtype == ints.dtype
        # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element

    @pytest.mark.parametrize("regex", [False, True])
    def test_replace_regex_dtype_series(self, regex):
        # GH-48644
        series = pd.Series(["0"])
        expected = pd.Series([1])
        result = series.replace(to_replace="0", value=1, regex=regex)
        tm.assert_series_equal(result, expected)

    def test_replace_different_int_types(self, any_int_numpy_dtype):
        # GH#45311
        labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)

        maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype)
        map_dict = {old: new for (old, new) in zip(maps.values, maps.index)}

        result = labs.replace(map_dict)
        expected = labs.replace({0: 0, 2: 1, 1: 2})
        tm.assert_series_equal(result, expected)
Esempio n. 7
0
class TestInsertIndexCoercion(CoercionBase):

    klasses = ["index"]
    method = "insert"

    def _assert_insert_conversion(self, original, value, expected,
                                  expected_dtype):
        """test coercion triggered by insert"""
        target = original.copy()
        res = target.insert(1, value)
        tm.assert_index_equal(res, expected)
        assert res.dtype == expected_dtype

    @pytest.mark.parametrize(
        "insert, coerced_val, coerced_dtype",
        [
            (1, 1, object),
            (1.1, 1.1, object),
            (False, False, object),
            ("x", "x", object),
        ],
    )
    def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
        obj = pd.Index(list("abcd"))
        assert obj.dtype == object

        exp = pd.Index(["a", coerced_val, "b", "c", "d"])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    @pytest.mark.parametrize(
        "insert, coerced_val, coerced_dtype",
        [
            (1, 1, np.int64),
            (1.1, 1.1, np.float64),
            (False, False, object),  # GH#36319
            ("x", "x", object),
        ],
    )
    def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
        obj = Int64Index([1, 2, 3, 4])
        assert obj.dtype == np.int64

        exp = pd.Index([1, coerced_val, 2, 3, 4])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    @pytest.mark.parametrize(
        "insert, coerced_val, coerced_dtype",
        [
            (1, 1.0, np.float64),
            (1.1, 1.1, np.float64),
            (False, False, object),  # GH#36319
            ("x", "x", object),
        ],
    )
    def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
        obj = Float64Index([1.0, 2.0, 3.0, 4.0])
        assert obj.dtype == np.float64

        exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    @pytest.mark.parametrize(
        "fill_val,exp_dtype",
        [
            (pd.Timestamp("2012-01-01"), "datetime64[ns]"),
            (pd.Timestamp("2012-01-01",
                          tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
        ],
        ids=["datetime64", "datetime64tz"],
    )
    @pytest.mark.parametrize(
        "insert_value",
        [
            pd.Timestamp("2012-01-01"),
            pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1
        ],
    )
    def test_insert_index_datetimes(self, request, fill_val, exp_dtype,
                                    insert_value):

        obj = pd.DatetimeIndex(
            ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"],
            tz=fill_val.tz)
        assert obj.dtype == exp_dtype

        exp = pd.DatetimeIndex(
            [
                "2011-01-01",
                fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"
            ],
            tz=fill_val.tz,
        )
        self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)

        if fill_val.tz:

            # mismatched tzawareness
            ts = pd.Timestamp("2012-01-01")
            result = obj.insert(1, ts)
            expected = obj.astype(object).insert(1, ts)
            assert expected.dtype == object
            tm.assert_index_equal(result, expected)

            # mismatched tz --> cast to object (could reasonably cast to common tz)
            ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
            with tm.assert_produces_warning(FutureWarning,
                                            match="mismatched timezone"):
                result = obj.insert(1, ts)
            # once deprecation is enforced:
            # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
            # assert expected.dtype == obj.dtype
            expected = obj.astype(object).insert(1, ts)
            tm.assert_index_equal(result, expected)

        else:
            # mismatched tzawareness
            ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
            result = obj.insert(1, ts)
            expected = obj.astype(object).insert(1, ts)
            assert expected.dtype == object
            tm.assert_index_equal(result, expected)

        item = 1
        result = obj.insert(1, item)
        expected = obj.astype(object).insert(1, item)
        assert expected[1] == item
        assert expected.dtype == object
        tm.assert_index_equal(result, expected)

    def test_insert_index_timedelta64(self):
        obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
        assert obj.dtype == "timedelta64[ns]"

        # timedelta64 + timedelta64 => timedelta64
        exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"])
        self._assert_insert_conversion(obj, pd.Timedelta("10 day"), exp,
                                       "timedelta64[ns]")

        for item in [pd.Timestamp("2012-01-01"), 1]:
            result = obj.insert(1, item)
            expected = obj.astype(object).insert(1, item)
            assert expected.dtype == object
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "insert, coerced_val, coerced_dtype",
        [
            (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
            (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
            (1, 1, object),
            ("x", "x", object),
        ],
    )
    def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
        obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"],
                             freq="M")
        assert obj.dtype == "period[M]"

        data = [
            pd.Period("2011-01", freq="M"),
            coerced_val,
            pd.Period("2011-02", freq="M"),
            pd.Period("2011-03", freq="M"),
            pd.Period("2011-04", freq="M"),
        ]
        if isinstance(insert, pd.Period):
            exp = pd.PeriodIndex(data, freq="M")
            self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

            # string that can be parsed to appropriate PeriodDtype
            self._assert_insert_conversion(obj, str(insert), exp,
                                           coerced_dtype)

        else:
            result = obj.insert(0, insert)
            expected = obj.astype(object).insert(0, insert)
            tm.assert_index_equal(result, expected)

            # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M"
            #  casts that string to Period[M], not clear that is desirable
            if not isinstance(insert, pd.Timestamp):
                # non-castable string
                result = obj.insert(0, str(insert))
                expected = obj.astype(object).insert(0, str(insert))
                tm.assert_index_equal(result, expected)

            msg = r"Unexpected keyword arguments {'freq'}"
            with pytest.raises(TypeError, match=msg):
                with tm.assert_produces_warning(FutureWarning):
                    # passing keywords to pd.Index
                    pd.Index(data, freq="M")

    @pytest.mark.xfail(reason="Test not implemented")
    def test_insert_index_complex128(self):
        raise NotImplementedError

    @pytest.mark.xfail(reason="Test not implemented")
    def test_insert_index_bool(self):
        raise NotImplementedError
Esempio n. 8
0
    assert result is array


def test_array_multiindex_raises():
    idx = pd.MultiIndex.from_product([['A'], ['a', 'b']])
    with pytest.raises(ValueError, match='MultiIndex'):
        idx.array


@pytest.mark.parametrize(
    'array, expected',
    [
        (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
        (pd.Categorical(['a', 'b']), np.array(['a', 'b'], dtype=object)),
        (pd.core.arrays.period_array(['2000', '2001'], freq='D'),
         np.array([pd.Period('2000', freq="D"),
                   pd.Period('2001', freq='D')])),
        (pd.core.arrays.integer_array(
            [0, np.nan]), np.array([0, np.nan], dtype=object)),
        (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]),
         np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)),
        (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
        # TODO: DatetimeArray(add)
    ])
@pytest.mark.parametrize('box', [pd.Series, pd.Index])
def test_to_numpy(array, expected, box):
    thing = box(array)

    if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index:
        pytest.skip("No index type for {}".format(array.dtype))
Esempio n. 9
0
def main():
    #read in data
    indPath = "Data/30_Industry_Portfolios.CSV"
    rfPath = "Data/rf.csv"
    begDate = "195912"
    endDate = "201612"
    (ind, rf) = loadData(indPath, rfPath, begDate, endDate)
    exsReturns = excessReturns(ind, rf)
    nrow = rf.count()[0]

    #create summary table (table 1 in paper)
    sumTable = summaryStat(exsReturns.iloc[:, 1:])  # no date

    #OLS post Lasso with entire time period
    indNames = list(exsReturns.iloc[:, 1:])
    df = exsReturns  #with date
    (inter, fullPeriodResult) = OLSlassoRegression(df)  # use aic

    # expanding period L/S portfolio construction
    startRow = 0
    endRow = df.loc[df["Date"] == 196912].index[
        0]  #start before the first prediction date (paper uses 196912)
    lastRow = df.loc[df["Date"] == 201612].index[0]
    periodR = pd.DataFrame(np.zeros(lastRow - endRow))
    # start with first predicting row
    dateIndex = pd.period_range(start=pd.Period(str(df.loc[endRow + 1,
                                                           "Date"]),
                                                freq="M"),
                                end=pd.Period(str(df.loc[lastRow, "Date"]),
                                              freq="M"),
                                freq="M")
    indBetaList = [
        pd.DataFrame(np.zeros((lastRow - endRow, len(indNames))),
                     index=dateIndex,
                     columns=indNames) for i in range(len(indNames))
    ]
    for e in range(endRow, lastRow):
        #change this to OLSlassoRegression(df, endRow = e, mode="predict")
        (yPred, betas) = OLSlassoRegression(df, endRow=e, mode="predict")

        yPred.sort_values(by=["yPred"], ascending=True, inplace=True)
        #print("yPred    = ", yPred)
        #after sorted returns, long top quintile, and short bottom quintile
        bottomInd = yPred.iloc[:5, :].index  #find the industries
        topInd = yPred.iloc[-5:, :].index

        bottomR = df.loc[endRow + 1, bottomInd]  #get the realized returns
        topR = df.loc[endRow + 1, topInd]
        print(indBetaList[0].index[e - endRow], np.round(np.average(topR)),
              np.round(np.average(bottomR)),
              np.round(np.average(topR) - np.average(bottomR)))
        periodR.iloc[e - endRow, :] = np.mean(topR) - np.mean(bottomR)

        for i in range(len(indNames)):
            indBeta = indBetaList[i]
            indBeta.iloc[e - endRow, :] = betas.loc[betas.index[i], :]
    print(np.mean(periodR) * 12)
    # print(indBeta)

    # for i in range(len(indNames)):
    # 	indBeta = indBetaList[i]
    # 	writer = pd.ExcelWriter(indNames[i] + " betas over time.xlsx")
    # 	indBeta.to_excel(writer, "Sheet1")
    # 	writer.save()
    # 	lineplot(indBeta.index, indBeta, "Date", "OLS post Lasso Coefficient", indNames[i] + " Betas Over Time")
    # indBeta = indBetaList[0]
    # writer = pd.ExcelWriter(indNames[0] + " betas over time.xlsx")
    # indBeta.to_excel(writer, "Sheet1")
    # writer.save()

    lineplot(indBeta.index, indBeta, "Date", "OLS post Lasso Coefficient",
             indNames[0] + " Betas Over Time")
'X4_Mon_logerror6std',
'X4_Mon_logerror3mean',
'X1_Mon_logerror6mean']

x = train.yrmonth
train = train[cols]
y = joblib.load("../input/y.pkl")
#################################################
# Val Split
#################################################
#x = pd.read_csv('../input/train_2016_v2.csv')
#x["transactiondate"] = pd.to_datetime(x["transactiondate"])
#x["yrmonth"] = x["transactiondate"].apply(lambda x: x.strftime('%Y%m')).astype(int)  

y_logit = x
valindex = y_logit > pd.Period('2017-05')
trainindex = y_logit <= pd.Period('2017-05')
valid = train[valindex]
yval = y[valindex]
#train = train[trainindex]#
#y = y[trainindex]
#################################################

lbound = np.mean(y) - 3 * np.std(y)
ubound = np.mean(y) + 3 * np.std(y)

test = joblib.load("../input/teststat.pkl")
test = test[cols]
#test = valid.copy()

gc.collect()
Esempio n. 11
0
    ('decimal', [Decimal(1), np.nan, Decimal(2)]),
    ('boolean', [True, np.nan, False]),
    ('datetime64',
     [np.datetime64('2013-01-01'), np.nan,
      np.datetime64('2018-01-01')]),
    ('datetime', [pd.Timestamp('20130101'), np.nan,
                  pd.Timestamp('20180101')]),
    ('date', [date(2013, 1, 1), np.nan,
              date(2018, 1, 1)]),
    # The following two dtypes are commented out due to GH 23554
    # ('complex', [1 + 1j, np.nan, 2 + 2j]),
    # ('timedelta64', [np.timedelta64(1, 'D'),
    #                  np.nan, np.timedelta64(2, 'D')]),
    ('timedelta', [timedelta(1), np.nan, timedelta(2)]),
    ('time', [time(1), np.nan, time(2)]),
    ('period', [pd.Period(2013), pd.NaT,
                pd.Period(2018)]),
    ('interval', [pd.Interval(0, 1), np.nan,
                  pd.Interval(0, 2)])
]
ids, _ = zip(*_any_skipna_inferred_dtype)  # use inferred type as fixture-id


@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
def any_skipna_inferred_dtype(request):
    """
    Fixture for all inferred dtypes from _libs.lib.infer_dtype

    The covered (inferred) types are:
    * 'string'
    * 'empty'
Esempio n. 12
0
 def test_pandas_period_index(self):
     v = self.cls(['x'], pd.period_range(start='2000', periods=20,
                                         freq='B'))
     self.assertEqual(v[0], pd.Period('2000', freq='B'))
     assert "Period('2000-01-03', 'B')" in repr(v)
Esempio n. 13
0
import itertools

import numpy as np
import pandas as pd
import pytest
from hamcrest import assert_that, none, not_none, calling, raises, close_to

import cifrum as lib
from conftest import decimal_places, delta
from cifrum._portfolio.currency import PortfolioCurrencyFactory
from cifrum.common.enums import Currency

__end_period = pd.Period('2018-12', freq='M')


@pytest.fixture
def pcf():
    return lib.obj_graph.provide(PortfolioCurrencyFactory)


@pytest.mark.parametrize(
    'currency, inflation_kind',
    itertools.product(Currency, ['values', 'cumulative', 'a_mean', 'g_mean']))
def test__exists_for_all_currencies(pcf: PortfolioCurrencyFactory,
                                    currency: Currency, inflation_kind: str):
    pc = pcf.new(currency=currency)
    infl = pc.inflation(kind=inflation_kind,
                        end_period=__end_period,
                        years_ago=4)
    assert_that(infl, not_none())
Esempio n. 14
0
def net_time_period_to_pandas_period(net_time_period, freq):
    start_datetime = net_datetime_to_py_datetime(net_time_period.Start)
    return pd.Period(start_datetime, freq=freq)
Esempio n. 15
0
def export_dataset(db, dataset):
    """Export all series for one Dataset
    
    Return array - one line by serie    
    """
    #TODO: Utiliser une queue Redis car trop de code en RAM ?

    start = time.time()

    headers = ['key'] + dataset['dimension_keys']
    #['key', 'freq', 'geo', 'na_item', 'nace_r2', 'unit']

    # revient à 0 et -1 ?
    dmin = float('inf')
    dmax = -float('inf')

    query = {
        'provider_name': dataset['provider_name'],
        "dataset_code": dataset['dataset_code']
    }
    series_list = db[constants.COL_SERIES].find(query)

    for s in series_list:
        #collect la première et dernière date trouvé
        """
        Permet d'avoir ensuite une plage de date la plus ancienne à la plus récente
        car chaque série n'a pas toujours les mêmes dates
        """
        if s['start_date'] < dmin:
            dmin = s['start_date']
        if s['end_date'] > dmax:
            dmax = s['end_date']
        freq = s['frequency']

    series_list.rewind()

    pDmin = pandas.Period(ordinal=dmin, freq=freq)
    pDmax = pandas.Period(ordinal=dmax, freq=freq)
    headers += list(
        pandas.period_range(pDmin, pDmax, freq=freq).to_native_types())
    #['key', 'freq', 'geo', 'na_item', 'nace_r2', 'unit', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014']

    elements = [headers]

    series_list.rewind()

    def row_process(s):
        row = [s['key']]

        for c in dataset['dimension_keys']:
            if c in s['dimensions']:
                row.append(s['dimensions'][c])
            else:
                row.append('')

        p_start_date = pandas.Period(ordinal=s['start_date'], freq=freq)
        p_end_date = pandas.Period(ordinal=s['end_date'], freq=freq)
        """
        pDmin : pandas.Period() la plus ancienne
        p_start_date-1 : périod en cours -1
            >>> p_start_date -1
            Period('1994', 'A-DEC')
            Bug: ne renvoi rien si
                p_start_date -1 devient identique à pDmin
        """

        # Les None sont pour les périodes qui n'ont pas de valeur correspondantes
        _row = [
            None
            for d in pandas.period_range(pDmin, p_start_date - 1, freq=freq)
        ]
        row.extend(_row)

        _row = [val["value"] for val in s['values']]
        row.extend(_row)

        _row = [
            None for d in pandas.period_range(p_end_date + 1, pDmax, freq=freq)
        ]
        row.extend(_row)

        return row

    for s in series_list:
        elements.append(row_process(s))

    end = time.time() - start
    logger.info("export_dataset - %s : %.3f" % (dataset['dataset_code'], end))

    return elements
Esempio n. 16
0
 def test_is_period(self):
     self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M')))
     self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')))
     self.assertFalse(lib.is_period(pd.Timestamp('2011-01')))
     self.assertFalse(lib.is_period(1))
     self.assertFalse(lib.is_period(np.nan))
Esempio n. 17
0
     marks=not_implemented_mark,
 ),
 pytest.param(
     (pd.DataFrame, frame_data,
      operator.methodcaller("quantile", q=[0.25, 0.75])),
     marks=not_implemented_mark,
 ),
 pytest.param(
     (pd.DataFrame, frame_data, operator.methodcaller("quantile")),
     marks=not_implemented_mark,
 ),
 (
     pd.DataFrame,
     ({
         "A": [1]
     }, [pd.Period("2000", "D")]),
     operator.methodcaller("to_timestamp"),
 ),
 (
     pd.DataFrame,
     ({
         "A": [1]
     }, [pd.Timestamp("2000")]),
     operator.methodcaller("to_period", freq="D"),
 ),
 pytest.param(
     (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])),
     marks=not_implemented_mark,
 ),
 pytest.param(
     (pd.DataFrame, frame_mi_data,
Esempio n. 18
0
    def test_count_nonnumeric_types(self):
        # GH12541
        cols = [
            "int",
            "float",
            "string",
            "datetime",
            "timedelta",
            "periods",
            "fl_inf",
            "fl_nan",
            "str_nan",
            "dt_nat",
            "periods_nat",
        ]

        df = DataFrame(
            {
                "int": [1, 2, 3],
                "float": [4.0, 5.0, 6.0],
                "string":
                list("abc"),
                "datetime":
                pd.date_range("20170101", periods=3),
                "timedelta":
                pd.timedelta_range("1 s", periods=3, freq="s"),
                "periods": [
                    pd.Period("2012-01"),
                    pd.Period("2012-02"),
                    pd.Period("2012-03"),
                ],
                "fl_inf": [1.0, 2.0, np.Inf],
                "fl_nan": [1.0, 2.0, np.NaN],
                "str_nan": ["aa", "bb", np.NaN],
                "dt_nat": [
                    Timestamp("20170101"),
                    Timestamp("20170203"),
                    Timestamp(None),
                ],
                "periods_nat": [
                    pd.Period("2012-01"),
                    pd.Period("2012-02"),
                    pd.Period(None),
                ],
            },
            columns=cols,
        )

        expected = DataFrame(
            {
                "int": [1.0, 2.0, 2.0],
                "float": [1.0, 2.0, 2.0],
                "string": [1.0, 2.0, 2.0],
                "datetime": [1.0, 2.0, 2.0],
                "timedelta": [1.0, 2.0, 2.0],
                "periods": [1.0, 2.0, 2.0],
                "fl_inf": [1.0, 2.0, 2.0],
                "fl_nan": [1.0, 2.0, 1.0],
                "str_nan": [1.0, 2.0, 1.0],
                "dt_nat": [1.0, 2.0, 1.0],
                "periods_nat": [1.0, 2.0, 1.0],
            },
            columns=cols,
        )

        result = df.rolling(window=2).count()
        tm.assert_frame_equal(result, expected)

        result = df.rolling(1).count()
        expected = df.notna().astype(float)
        tm.assert_frame_equal(result, expected)
Esempio n. 19
0
def test_timestep_greater_than_zero_days():
    """Test trying to create zero length Timestep."""

    with pytest.raises(ValueError):
        # Test setting days <= 0 raises an error
        Timestep(pandas.Period('2019-01-01', freq='D'), 0, 0)
Esempio n. 20
0
def date_to_period(row, freq='D'):
    """ 'Date' is a string. Create a Period. Default is year, month and day. """
    date = row[common.DATE_COL]
    period = pd.Period(date, freq=freq)
    return period
Esempio n. 21
0
def dashboard_sourceData(request):
    if request.method == 'GET':
        json_out = {}
        main_out = {}
        days_num = 0
        data = {}
    
        try:
            # sourcedata
            days_list = []
            day = pd.Period(datetime.datetime.now(),freq='D')
            # logger.info(type(Topic.objects)) # Topic.objects.all()Topic.objects()返回类型相同
            topic_list = Topic.objects
            datatype_list = Datatype_name.objects

            today = date.today()
            post_7days = Post.objects(Q(pt_time__gte=datetime.datetime.combine\
                            (today-datetime.timedelta(7),datetime.time.min)) &
                            Q(pt_time__lte=datetime.datetime.combine(today, datetime.time.max)))

            while days_num < 7:
                day_str = day.strftime('%Y%m%d')  

                day_change = today - datetime.timedelta(days=days_num)
                post = post_7days(Q(pt_time__gte=\
                            datetime.datetime.combine(day_change,datetime.time.min)) & \
                            Q(pt_time__lte=datetime.datetime.combine\
                            (day_change, datetime.time.max)))


                for topic in topic_list:
                    for data in datatype_list:
                        day_dict = {}
                        day_dict['time'] = day_str
                        day_dict['topic_id'] = topic._id
                        day_dict['topic_name'] = topic.topic_name
                        day_dict['dataTypeName'] = data.datatype_name
                        day_dict['data_type'] = data.data_type
                        post_datatype = post(Q(data_type=data.data_type) & Q(topic_id=topic._id))
                        # logger.info('post_num = ' + str(len(post_datatype)))
                        day_dict['post_num'] = post_datatype.count()
                        days_list.append(day_dict)

                for data in datatype_list:
                    day_dict = {}
                    day_dict['time'] = day_str
                    day_dict['topic_id'] = 0
                    day_dict['topic_name'] = ''
                    day_dict['dataTypeName'] = data.datatype_name
                    day_dict['data_type'] = data.data_type
                    post_datatype = post(Q(data_type=data.data_type) & Q(topic_id=0))
                    # logger.info('post_num = ' + str(len(post_datatype)))
                    day_dict['post_num'] = post_datatype.count()
                    days_list.append(day_dict)

                day -= 1
                days_num += 1


            #######  Hot for all host posts
            hot_dict = {}
            hot_posts = post_7days(Q(topic_id__ne=0) & (Q(data_type=3) | Q(data_type=2))).order_by \
                                  ('-comm_num')[:10].only("_id", "url", \
                                    "board", "title", "content", "pt_time", \
                                    "img_url", "poster")

            # hot_poster = post_7days.only('poster').all()
            # logger.info("hot_poster = " + str(hot_poster.count()))
    
            hot_weibo = post_7days(Q(topic_id__ne=0) & Q(data_type=2)).order_by \
                                ('-comm_num')[:10].only("_id", "url", \
                                "board", "title", "content", "pt_time", \
                               "img_url")

            hot_dict['hotPost'] = handle_post_list(hot_posts)
            hot_dict['hotPoster'] = handle_poster_list(hot_posts)
            hot_dict['hotWeibo'] = handle_post_list(hot_weibo)


            # wordlist = []
            # wordres=Cloud_formain.objects.only("word", "frequency")
            # for worditem in wordres:
            #     temp={}
            #     temp['word']=worditem.word
            #     temp['weight']=worditem.frequency
            #     wordlist.append(temp)


            #######  map data
            mapData_list = [{'id':'001',
                                     'pro':"陕西",
                                     'nums':52
                                    },
                                    {
                                     'id':'002',
                                     'pro':"北京",
                                     'nums':100
                                    },
                                    {
                                    'id':'003',
                                     'pro':"上海",
                                     'nums':60
                                     },
                                    {
                                    'id':'004',
                                     'pro':"杭州",
                                     'nums':48
                                     },
                                    {
                                    'id':'005',
                                    'pro':"南京",
                                    'nums':50
                                    }
                                ]

            main_out['mapData'] = mapData_list
            main_out['sourceData'] = days_list
            main_out['Hot'] = hot_dict
            # main_out['word_cloud'] = wordlist



            json_out['code'] = 0
            json_out['success'] = True
            json_out['data'] = main_out
        except:
            traceback.print_exc()
            json_out['code'] = 1
            json_out['data'] = {}
            json_out['success'] = False

        return HttpResponse(json.dumps(json_out, cls=MyEncoder),content_type="application/json")
Esempio n. 22
0
def _to_timestamp(year):
    return pd.Period(year=year, freq='Y').to_timestamp()
Esempio n. 23
0
class TestDataFrameAppend:
    @pytest.mark.filterwarnings(
        "ignore:.*append method is deprecated.*:FutureWarning")
    def test_append_multiindex(self, multiindex_dataframe_random_data,
                               frame_or_series):
        obj = multiindex_dataframe_random_data
        obj = tm.get_obj(obj, frame_or_series)

        a = obj[:5]
        b = obj[5:]

        result = a.append(b)
        tm.assert_equal(result, obj)

    def test_append_empty_list(self):
        # GH 28769
        df = DataFrame()
        result = df._append([])
        expected = df
        tm.assert_frame_equal(result, expected)
        assert result is not df

        df = DataFrame(np.random.randn(5, 4),
                       columns=["foo", "bar", "baz", "qux"])
        result = df._append([])
        expected = df
        tm.assert_frame_equal(result, expected)
        assert result is not df  # ._append() should return a new object

    def test_append_series_dict(self):
        df = DataFrame(np.random.randn(5, 4),
                       columns=["foo", "bar", "baz", "qux"])

        series = df.loc[4]
        msg = "Indexes have overlapping values"
        with pytest.raises(ValueError, match=msg):
            df._append(series, verify_integrity=True)

        series.name = None
        msg = "Can only append a Series if ignore_index=True"
        with pytest.raises(TypeError, match=msg):
            df._append(series, verify_integrity=True)

        result = df._append(series[::-1], ignore_index=True)
        expected = df._append(DataFrame({
            0: series[::-1]
        }, index=df.columns).T,
                              ignore_index=True)
        tm.assert_frame_equal(result, expected)

        # dict
        result = df._append(series.to_dict(), ignore_index=True)
        tm.assert_frame_equal(result, expected)

        result = df._append(series[::-1][:3], ignore_index=True)
        expected = df._append(DataFrame({
            0: series[::-1][:3]
        }).T,
                              ignore_index=True,
                              sort=True)
        tm.assert_frame_equal(result, expected.loc[:, result.columns])

        msg = "Can only append a dict if ignore_index=True"
        with pytest.raises(TypeError, match=msg):
            df._append(series.to_dict())

        # can append when name set
        row = df.loc[4]
        row.name = 5
        result = df._append(row)
        expected = df._append(df[-1:], ignore_index=True)
        tm.assert_frame_equal(result, expected)

    def test_append_list_of_series_dicts(self):
        df = DataFrame(np.random.randn(5, 4),
                       columns=["foo", "bar", "baz", "qux"])

        dicts = [x.to_dict() for idx, x in df.iterrows()]

        result = df._append(dicts, ignore_index=True)
        expected = df._append(df, ignore_index=True)
        tm.assert_frame_equal(result, expected)

        # different columns
        dicts = [
            {
                "foo": 1,
                "bar": 2,
                "baz": 3,
                "peekaboo": 4
            },
            {
                "foo": 5,
                "bar": 6,
                "baz": 7,
                "peekaboo": 8
            },
        ]
        result = df._append(dicts, ignore_index=True, sort=True)
        expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
        tm.assert_frame_equal(result, expected)

    def test_append_list_retain_index_name(self):
        df = DataFrame([[1, 2], [3, 4]],
                       index=pd.Index(["a", "b"], name="keepthisname"))

        serc = Series([5, 6], name="c")

        expected = DataFrame(
            [[1, 2], [3, 4], [5, 6]],
            index=pd.Index(["a", "b", "c"], name="keepthisname"),
        )

        # append series
        result = df._append(serc)
        tm.assert_frame_equal(result, expected)

        # append list of series
        result = df._append([serc])
        tm.assert_frame_equal(result, expected)

    def test_append_missing_cols(self):
        # GH22252
        # exercise the conditional branch in append method where the data
        # to be appended is a list and does not contain all columns that are in
        # the target DataFrame
        df = DataFrame(np.random.randn(5, 4),
                       columns=["foo", "bar", "baz", "qux"])

        dicts = [{"foo": 9}, {"bar": 10}]
        result = df._append(dicts, ignore_index=True, sort=True)

        expected = df._append(DataFrame(dicts), ignore_index=True, sort=True)
        tm.assert_frame_equal(result, expected)

    def test_append_empty_dataframe(self):

        # Empty df append empty df
        df1 = DataFrame()
        df2 = DataFrame()
        result = df1._append(df2)
        expected = df1.copy()
        tm.assert_frame_equal(result, expected)

        # Non-empty df append empty df
        df1 = DataFrame(np.random.randn(5, 2))
        df2 = DataFrame()
        result = df1._append(df2)
        expected = df1.copy()
        tm.assert_frame_equal(result, expected)

        # Empty df with columns append empty df
        df1 = DataFrame(columns=["bar", "foo"])
        df2 = DataFrame()
        result = df1._append(df2)
        expected = df1.copy()
        tm.assert_frame_equal(result, expected)

        # Non-Empty df with columns append empty df
        df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
        df2 = DataFrame()
        result = df1._append(df2)
        expected = df1.copy()
        tm.assert_frame_equal(result, expected)

    def test_append_dtypes(self):

        # GH 5754
        # row appends of different dtypes (so need to do by-item)
        # can sometimes infer the correct type

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5))
        df2 = DataFrame()
        result = df1._append(df2)
        expected = df1.copy()
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": "foo"}, index=range(1, 2))
        result = df1._append(df2)
        expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]})
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": np.nan}, index=range(1, 2))
        result = df1._append(df2)
        expected = DataFrame(
            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")})
        expected = expected.astype(object)
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object)
        result = df1._append(df2)
        expected = DataFrame(
            {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")})
        expected = expected.astype(object)
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": np.nan}, index=range(1))
        df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2))
        result = df1._append(df2)
        expected = DataFrame(
            {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")})
        expected = expected.astype(object)
        tm.assert_frame_equal(result, expected)

        df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1))
        df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object)
        result = df1._append(df2)
        expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])})
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"])
    def test_append_timestamps_aware_or_naive(self, tz_naive_fixture,
                                              timestamp):
        # GH 30238
        tz = tz_naive_fixture
        df = DataFrame([Timestamp(timestamp, tz=tz)])
        result = df._append(df.iloc[0]).iloc[-1]
        expected = Series(Timestamp(timestamp, tz=tz), name=0)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "data, dtype",
        [
            ([1], pd.Int64Dtype()),
            ([1], pd.CategoricalDtype()),
            ([pd.Interval(left=0, right=5)], pd.IntervalDtype()),
            ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")),
            ([1], pd.SparseDtype()),
        ],
    )
    def test_other_dtypes(self, data, dtype):
        df = DataFrame(data, dtype=dtype)
        result = df._append(df.iloc[0]).iloc[-1]
        expected = Series(data, name=0, dtype=dtype)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
    def test_append_numpy_bug_1681(self, dtype):
        # another datetime64 bug
        if dtype == "datetime64[ns]":
            index = date_range("2011/1/1", "2012/1/1", freq="W-FRI")
        else:
            index = timedelta_range("1 days", "10 days", freq="2D")

        df = DataFrame()
        other = DataFrame({"A": "foo", "B": index}, index=index)

        result = df._append(other)
        assert (result["B"] == index).all()

    @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning"
                                )
    def test_multiindex_column_append_multiple(self):
        # GH 29699
        df = DataFrame(
            [[1, 11], [2, 12], [3, 13]],
            columns=pd.MultiIndex.from_tuples([("multi", "col1"),
                                               ("multi", "col2")],
                                              names=["level1", None]),
        )
        df2 = df.copy()
        for i in range(1, 10):
            df[i, "colA"] = 10
            df = df._append(df2, ignore_index=True)
            result = df["multi"]
            expected = DataFrame({
                "col1": [1, 2, 3] * (i + 1),
                "col2": [11, 12, 13] * (i + 1)
            })
            tm.assert_frame_equal(result, expected)

    def test_append_raises_future_warning(self):
        # GH#35407
        df1 = DataFrame([[1, 2], [3, 4]])
        df2 = DataFrame([[5, 6], [7, 8]])
        with tm.assert_produces_warning(FutureWarning):
            df1.append(df2)
Esempio n. 24
0
import numpy as np
import pandas as pd
from hamcrest import assert_that, close_to, calling, raises

import yapo as y
from conftest import decimal_places, delta
from yapo._settings import _MONTHS_PER_YEAR
from yapo.common.time_series import TimeSeriesKind

__asset_name = 'mut_ru/0890-94127385'
__portfolio_period_start = pd.Period('2011-1', freq='M')
__portfolio_period_end = pd.Period('2017-2', freq='M')
__asset = y.portfolio_asset(name=__asset_name,
                            start_period=str(__portfolio_period_start),
                            end_period=str(__portfolio_period_end),
                            currency='USD')


def test__cumulative_get_return():
    arors = __asset.get_return(kind='cumulative').values
    assert_that(arors.max(), close_to(.0924, delta))
    assert_that(arors.min(), close_to(-.5464, delta))

    arors_real = __asset.get_return(kind='cumulative', real=True).values
    assert_that(arors_real.max(), close_to(.0765, delta))
    assert_that(arors_real.min(), close_to(-.5725, delta))


def test__ytd_get_return():
    ror_ytd = __asset.get_return(kind='ytd')
    assert ror_ytd.start_period == pd.Period('2012-1', freq='M')
Esempio n. 25
0
     [1, 2],
     np.dtype("float32"),
     PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))),
 ),
 (np.array([1, 2],
           dtype="int64"), None, IntegerArray._from_sequence([1, 2])),
 (
     np.array([1.0, 2.0], dtype="float64"),
     None,
     FloatingArray._from_sequence([1.0, 2.0]),
 ),
 # String alias passes through to NumPy
 ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
 # Period alias
 (
     [pd.Period("2000", "D"),
      pd.Period("2001", "D")],
     "Period[D]",
     period_array(["2000", "2001"], freq="D"),
 ),
 # Period dtype
 (
     [pd.Period("2000", "D")],
     pd.PeriodDtype("D"),
     period_array(["2000"], freq="D"),
 ),
 # Datetime (naive)
 (
     [1, 2],
     np.dtype("datetime64[ns]"),
     DatetimeArray._from_sequence(
class TestInsertIndexCoercion(CoercionBase):

    klasses = ['index']
    method = 'insert'

    def _assert_insert_conversion(self, original, value, expected,
                                  expected_dtype):
        """ test coercion triggered by insert """
        target = original.copy()
        res = target.insert(1, value)
        tm.assert_index_equal(res, expected)
        assert res.dtype == expected_dtype

    @pytest.mark.parametrize("insert, coerced_val, coerced_dtype",
                             [(1, 1, np.object), (1.1, 1.1, np.object),
                              (False, False, np.object),
                              ('x', 'x', np.object)])
    def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
        obj = pd.Index(list('abcd'))
        assert obj.dtype == np.object

        exp = pd.Index(['a', coerced_val, 'b', 'c', 'd'])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    @pytest.mark.parametrize("insert, coerced_val, coerced_dtype",
                             [(1, 1, np.int64), (1.1, 1.1, np.float64),
                              (False, 0, np.int64), ('x', 'x', np.object)])
    def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
        obj = pd.Int64Index([1, 2, 3, 4])
        assert obj.dtype == np.int64

        exp = pd.Index([1, coerced_val, 2, 3, 4])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    @pytest.mark.parametrize("insert, coerced_val, coerced_dtype",
                             [(1, 1., np.float64), (1.1, 1.1, np.float64),
                              (False, 0., np.float64), ('x', 'x', np.object)])
    def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
        obj = pd.Float64Index([1., 2., 3., 4.])
        assert obj.dtype == np.float64

        exp = pd.Index([1., coerced_val, 2., 3., 4.])
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    @pytest.mark.parametrize('fill_val,exp_dtype',
                             [(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
                              (pd.Timestamp('2012-01-01', tz='US/Eastern'),
                               'datetime64[ns, US/Eastern]')],
                             ids=['datetime64', 'datetime64tz'])
    def test_insert_index_datetimes(self, fill_val, exp_dtype):
        obj = pd.DatetimeIndex(
            ['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'],
            tz=fill_val.tz)
        assert obj.dtype == exp_dtype

        exp = pd.DatetimeIndex([
            '2011-01-01',
            fill_val.date(), '2011-01-02', '2011-01-03', '2011-01-04'
        ],
                               tz=fill_val.tz)
        self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)

        msg = "Passed item and index have different timezone"
        if fill_val.tz:
            with tm.assert_raises_regex(ValueError, msg):
                obj.insert(1, pd.Timestamp('2012-01-01'))

        with tm.assert_raises_regex(ValueError, msg):
            obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))

        msg = "cannot insert DatetimeIndex with incompatible label"
        with tm.assert_raises_regex(TypeError, msg):
            obj.insert(1, 1)

        pytest.xfail("ToDo: must coerce to object")

    def test_insert_index_timedelta64(self):
        obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day'])
        assert obj.dtype == 'timedelta64[ns]'

        # timedelta64 + timedelta64 => timedelta64
        exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day'])
        self._assert_insert_conversion(obj, pd.Timedelta('10 day'), exp,
                                       'timedelta64[ns]')

        # ToDo: must coerce to object
        msg = "cannot insert TimedeltaIndex with incompatible label"
        with tm.assert_raises_regex(TypeError, msg):
            obj.insert(1, pd.Timestamp('2012-01-01'))

        # ToDo: must coerce to object
        msg = "cannot insert TimedeltaIndex with incompatible label"
        with tm.assert_raises_regex(TypeError, msg):
            obj.insert(1, 1)

    @pytest.mark.parametrize(
        "insert, coerced_val, coerced_dtype",
        [(pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'),
         (pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object),
         (1, 1, np.object), ('x', 'x', np.object)])
    def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
        obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
                             freq='M')
        assert obj.dtype == 'period[M]'

        if isinstance(insert, pd.Period):
            index_type = pd.PeriodIndex
        else:
            index_type = pd.Index

        exp = index_type([
            pd.Period('2011-01', freq='M'), coerced_val,
            pd.Period('2011-02', freq='M'),
            pd.Period('2011-03', freq='M'),
            pd.Period('2011-04', freq='M')
        ],
                         freq='M')
        self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

    def test_insert_index_complex128(self):
        pass

    def test_insert_index_bool(self):
        pass
Esempio n. 27
0
####################################################################

# basic date parse (string to date):
df = pd.read_csv("file.csv", parse_dates["column_name"])

# Use the DATE column as INDEX (first convert the value to date with parse)
df = pd.read_csv("file.csv", parse_dates["column_name"], index_col["column_name"])
# now you can use partial index to retrieve all of the month etc.
df["2020-01"]
# can also add metrics:
df["2020-01"].mean()
# can use range of dates:
df['2020-01-01':'2020-01-20']

# Define Datetime periods:
m = pd.Period('2020-1',freq='M')
# print out the starttime/endtime
m.start_time / m.end_time
# operations:
m+1 (adds febuary)


# TIMEZONES:
from pytz import all_timezones
print(all_timezones) #this will show all the timezones available
# python has 2 datetime objects: 'Naive' (no timezone) and 'Timezone Aware'
# assign timezone to date index:
df = df.tz_localize(tz='US/Eastern')
df.index # now it will show timezone with UTC-4
# Convert to Berlin:
df = df.tz_convert(tz='Europe/Berlin')
Esempio n. 28
0
 def test_isscalar_pandas_scalars(self):
     self.assertTrue(isscalar(pd.Timestamp('2014-01-01')))
     self.assertTrue(isscalar(pd.Timedelta(hours=1)))
     self.assertTrue(isscalar(pd.Period('2014-01-01')))
Esempio n. 29
0
# <a id="371"></a> <br>
# ### 3-7-1 Timestamp

# In[ ]:


pd.Timestamp('9/1/2016 10:05AM')

# <a id="372"></a> <br>
# ### 3-7-2 Period

# In[ ]:


pd.Period('1/2016')

# In[ ]:


pd.Period('3/5/2016')

# <a id="373"></a> <br>
# ### 3-7-3 DatetimeIndex

# In[ ]:


t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])
t1
Esempio n. 30
0
    ("floating", [1.0, np.nan, 2.0]),
    ("integer", [1, np.nan, 2]),
    ("mixed-integer-float", [1, np.nan, 2.0]),
    ("decimal", [Decimal(1), np.nan, Decimal(2)]),
    ("boolean", [True, np.nan, False]),
    ("boolean", [True, pd.NA, False]),
    ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]),
    ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]),
    ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]),
    # The following two dtypes are commented out due to GH 23554
    # ('complex', [1 + 1j, np.nan, 2 + 2j]),
    # ('timedelta64', [np.timedelta64(1, 'D'),
    #                  np.nan, np.timedelta64(2, 'D')]),
    ("timedelta", [timedelta(1), np.nan, timedelta(2)]),
    ("time", [time(1), np.nan, time(2)]),
    ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]),
    ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]),
]
ids, _ = zip(*_any_skipna_inferred_dtype)  # use inferred type as fixture-id


@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids)
def any_skipna_inferred_dtype(request):
    """
    Fixture for all inferred dtypes from _libs.lib.infer_dtype

    The covered (inferred) types are:
    * 'string'
    * 'empty'
    * 'bytes'
    * 'mixed'