target = pro.daily(ts_code=stock + ".SH") target = target[["trade_date", "open", "close", "high", "low"]] target["trade_date"] = pd.to_datetime(target["trade_date"], format="%Y%m%d") target.index = target["trade_date"] target.sort_index(ascending=True, inplace=True) ''' today = ts.get_realtime_quotes(stock) #Single stock symbol today["date"] = pd.to_datetime(today["date"], format="%Y-%m-%d") today.index = today["date"] #添加实时数据到历史列表中 hist.loc[today["date"].iloc[0]] = [today["date"].iloc[0],today["open"].iloc[0],today["price"].iloc[0]] target.loc[today["date"].iloc[0]] = [today["date"].iloc[0],today["open"].iloc[0],today["price"].iloc[0]] ''' p = pd.Period(start, freq="B") + 1 csv = pd.DataFrame(columns=['交易类型', '交易价格', '持仓数量', '可用资金', '人民币总资产']) class Account(): def __init__(self, capital_base, previous_date): self.capital_base = capital_base self.current_positions = [] self.target_position = [] self.stock_amounts = {} self.previous_date = pd.Period(previous_date, freq="B") self.cash = 10000 def order_to(stock, amounts): print(1)
def proforma_report(self, apply_inflation_rate_func, fill_forward_func, results): """ Calculates the proforma that corresponds to participation in this value stream Args: apply_inflation_rate_func: fill_forward_func: results (pd.DataFrame): Returns: A DateFrame of with each year in opt_year as the index and the corresponding value this stream provided. """ pro_forma = super().proforma_report(apply_inflation_rate_func, fill_forward_func, results) tech_id = self.unique_tech_id() if self.variables_df.empty: return pro_forma optimization_years = self.variables_df.index.year.unique() # OM COSTS om_costs = pd.DataFrame() cumulative_energy_dispatch_kw = pd.DataFrame() elec = self.variables_df['elec'] udis = self.variables_df['udis'] dis_column_name = tech_id + ' Cumulative Energy Dispatch (kW)' variable_column_name = tech_id + ' Variable O&M Costs' for year in optimization_years: index_yr = pd.Period(year=year, freq='y') # add fixed o&m costs om_costs.loc[index_yr, self.fixed_column_name()] = -self.fixed_om # add variable costs elec_sub = elec.loc[elec.index.year == year] udis_sub = udis.loc[udis.index.year == year] om_costs.loc[index_yr, variable_column_name] = -self.variable_om cumulative_energy_dispatch_kw.loc[ index_yr, dis_column_name] = np.sum(elec_sub) + np.sum(udis_sub) # fill forward (escalate rates) om_costs = fill_forward_func(om_costs, None, is_om_cost=True) # interpolate cumulative energy dispatch between analysis years # be careful to not include years labeled as Strings (CAPEX) years_list = list( filter(lambda x: not (type(x) is str), om_costs.index)) analysis_start_year = min(years_list).year analysis_end_year = max(years_list).year cumulative_energy_dispatch_kw = self.interpolate_energy_dispatch( cumulative_energy_dispatch_kw, analysis_start_year, analysis_end_year, None) # calculate om costs in dollars, as rate * energy # fixed om is already in $ # variable om om_costs.loc[:, variable_column_name] = om_costs.loc[:, variable_column_name] * self.dt * cumulative_energy_dispatch_kw.loc[:, dis_column_name] # append with super class's proforma pro_forma = pd.concat([pro_forma, om_costs], axis=1) # fuel costs in $/kW fuel_costs = pd.DataFrame() fuel_col_name = tech_id + ' Fuel Costs' for year in optimization_years: elec_sub = elec.loc[elec.index.year == year] udis_sub = udis.loc[udis.index.year == year] # add fuel costs in $/kW fuel_costs.loc[pd.Period(year=year, freq='y'), fuel_col_name] = -np.sum( self.heat_rate * self.fuel_cost * self.dt * (elec_sub + udis_sub)) # fill forward fuel_costs = fill_forward_func(fuel_costs, None) # append with super class's proforma pro_forma = pd.concat([pro_forma, fuel_costs], axis=1) return pro_forma
def test_constructor_period_incompatible_frequency(self): data = [pd.Period('2000', 'D'), pd.Period('2001', 'A')] result = pd.Series(data) assert result.dtype == object assert result.tolist() == data
def dashboard_sourceData_temp(request): if request.method == 'GET': json_out = {} main_out = {} days_num = 0 data = {} try: # sourcedata days_list = [] topic_list = Topic.objects day = pd.Period(datetime.datetime.now(),freq='D') # logger.info(type(Topic.objects)) # Topic.objects.all()Topic.objects()返回类型相同 datatype_list = Datatype_name.objects post_7days = Post.objects(Q(pt_time__gte=datetime.datetime.combine\ (date.today()-datetime.timedelta(6),datetime.time.min)) & Q(pt_time__lte=datetime.datetime.combine(date.today(), datetime.time.max))) while days_num < 7: day_dict = {} day_str = day.strftime('%Y%m%d') day_dict['time'] = day_str post = post_7days(Q(pt_time__gte=\ datetime.datetime.combine(date.today()-\ datetime.timedelta(days=days_num), \ datetime.time.min)) & \ Q(pt_time__lte=datetime.datetime.combine\ (date.today()-datetime.timedelta(days=days_num), \ datetime.time.max))) for topic in topic_list: for data in datatype_list: day_dict = {} day_dict['time'] = day_str day_dict['topic_id'] = topic._id day_dict['topic_name'] = topic.topic_name day_dict['dataTypeName'] = data.datatype_name day_dict['data_type'] = data.data_type post_datatype = post(Q(data_type=data.data_type) & Q(topic_id=topic._id)) # logger.info('post_num = ' + str(len(post_datatype))) day_dict['post_num'] = post_datatype.count() days_list.append(day_dict) # for data in datatype_list: # day_dict['dataTypeName'] = data.datatype_name # day_dict['data_type'] = data.data_type # post_datatype = post(Q(data_type=data.data_type)) # # logger.info('post_num = ' + str(len(post_datatype))) # day_dict['post_num'] = len(post_datatype) # day_dict_ = dict(day_dict) # days_list.append(day_dict_) day -= 1 days_num += 1 ####### Hot for all host posts hot_dict = {} hot_posts_temp = post_7days(Q(data_type=3)) hot_posts = hot_posts_temp.order_by \ ('-comm_num')[:10].only("_id", "url", \ "board", "title", "content", "pt_time", \ "img_url","comm_num","repost_num") hot_weibo_temp = post_7days(Q(data_type=2)) hot_weibo = hot_weibo_temp.order_by \ ('-comm_num')[:10].only("_id", "url", \ "board", "title", "content", "pt_time", \ "img_url","comm_num","repost_num") hot_poster = hot_posts_temp.order_by('-poster.post_num')[:10].only("poster") hot_weibouser_post_num = hot_weibo_temp.order_by('-poster.post_num').only("poster") hot_weibouser = [] hot_weibouser_id = [] num_ten=0 for item in hot_weibouser_post_num: if num_ten ==10: break else: if item.poster.id not in hot_weibouser_id: hot_weibouser.append(item) hot_weibouser_id.append(item.poster.id) num_ten=num_ten+1 hot_dict['hotPost'] = handle_post_list1(hot_posts) hot_dict['hotPoster'] = handle_poster_list1(hot_poster) hot_dict['hotWeibo'] = handle_post_list1(hot_weibo) # hot_dict['hotPoster'] = handle_poster_list(hot_posts) hot_dict['hotWeiboUser'] = handle_weibouser_list1(hot_weibouser) ####### map data mapData_list = [{'id':'001', 'pro':"陕西", 'nums':52 }, { 'id':'002', 'pro':"北京", 'nums':100 }, { 'id':'003', 'pro':"上海", 'nums':60 }, { 'id':'004', 'pro':"杭州", 'nums':48 }, { 'id':'005', 'pro':"南京", 'nums':50 } ] wordlist = [] wordres=Cloud_formain.objects(Q(topic_id=999)).only("word", "frequency") for worditem in wordres: temp={} temp['word']=worditem.word temp['weight']=worditem.frequency wordlist.append(temp) main_out['mapData'] = mapData_list main_out['sourceData'] = days_list main_out['Hot'] = hot_dict main_out['word_cloud'] = wordlist json_out['code'] = 0 json_out['success'] = True json_out['data'] = main_out except: traceback.print_exc() json_out['code'] = 1 json_out['data'] = {} json_out['success'] = False return HttpResponse(json.dumps(json_out, cls=MyEncoder),content_type="application/json")
# Since right now, by default MI will drop NA from levels when we create MI # via `from_*`, so we need to add NA for level manually afterwards. if not dropna: mi = mi.set_levels(["A", "B", np.nan], level="b") expected = pd.DataFrame(outputs, index=mi) tm.assert_frame_equal(grouped, expected) @pytest.mark.parametrize( "datetime1, datetime2", [ (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-02-01")), (pd.Timedelta("-2 days"), pd.Timedelta("-1 days")), (pd.Period("2020-01-01"), pd.Period("2020-02-01")), ], ) @pytest.mark.parametrize("dropna, values", [(True, [12, 3]), (False, [12, 3, 6])]) def test_groupby_dropna_datetime_like_data(dropna, values, datetime1, datetime2, unique_nulls_fixture, unique_nulls_fixture2): # 3729 df = pd.DataFrame({ "values": [1, 2, 3, 4, 5, 6], "dt": [ datetime1, unique_nulls_fixture, datetime2, unique_nulls_fixture2,
class TestSeriesReplace: def test_replace_explicit_none(self): # GH#36984 if the user explicitly passes value=None, give it to them ser = pd.Series([0, 0, ""], dtype=object) result = ser.replace("", None) expected = pd.Series([0, 0, None], dtype=object) tm.assert_series_equal(result, expected) df = pd.DataFrame(np.zeros((3, 3))) df.iloc[2, 2] = "" result = df.replace("", None) expected = pd.DataFrame( { 0: np.zeros(3), 1: np.zeros(3), 2: np.array([0.0, 0.0, None], dtype=object), } ) assert expected.iloc[2, 2] is None tm.assert_frame_equal(result, expected) # GH#19998 same thing with object dtype ser = pd.Series([10, 20, 30, "a", "a", "b", "a"]) result = ser.replace("a", None) expected = pd.Series([10, 20, 30, None, None, "b", None]) assert expected.iloc[-1] is None tm.assert_series_equal(result, expected) def test_replace_numpy_nan(self, nulls_fixture): # GH#45725 ensure numpy.nan can be replaced with all other null types to_replace = np.nan value = nulls_fixture dtype = object ser = pd.Series([to_replace], dtype=dtype) expected = pd.Series([value], dtype=dtype) result = ser.replace({to_replace: value}).astype(dtype=dtype) tm.assert_series_equal(result, expected) assert result.dtype == dtype # same thing but different calling convention result = ser.replace(to_replace, value).astype(dtype=dtype) tm.assert_series_equal(result, expected) assert result.dtype == dtype def test_replace_noop_doesnt_downcast(self): # GH#44498 ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object) res = ser.replace({np.nan: None}) # should be a no-op tm.assert_series_equal(res, ser) assert res.dtype == object # same thing but different calling convention res = ser.replace(np.nan, None) tm.assert_series_equal(res, ser) assert res.dtype == object def test_replace(self): N = 100 ser = pd.Series(np.random.randn(N)) ser[0:4] = np.nan ser[6:10] = 0 # replace list with a single value return_value = ser.replace([np.nan], -1, inplace=True) assert return_value is None exp = ser.fillna(-1) tm.assert_series_equal(ser, exp) rs = ser.replace(0.0, np.nan) ser[ser == 0.0] = np.nan tm.assert_series_equal(rs, ser) ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) ser[:5] = np.nan ser[6:10] = "foo" ser[20:30] = "bar" # replace list with a single value rs = ser.replace([np.nan, "foo", "bar"], -1) assert (rs[:5] == -1).all() assert (rs[6:10] == -1).all() assert (rs[20:30] == -1).all() assert (pd.isna(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) assert (rs[:5] == -1).all() assert (rs[6:10] == -2).all() assert (rs[20:30] == -3).all() assert (pd.isna(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) tm.assert_series_equal(rs, rs2) # replace inplace return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) assert return_value is None assert (ser[:5] == -1).all() assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all() def test_replace_nan_with_inf(self): ser = pd.Series([np.nan, 0, np.inf]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT]) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) filled = ser.copy() filled[4] = 0 tm.assert_series_equal(ser.replace(np.inf, 0), filled) def test_replace_listlike_value_listlike_target(self, datetime_series): ser = pd.Series(datetime_series.index) tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) # malformed msg = r"Replacement lists must match in length\. Expecting 3 got 2" with pytest.raises(ValueError, match=msg): ser.replace([1, 2, 3], [np.nan, 0]) # ser is dt64 so can't hold 1 or 2, so this replace is a no-op result = ser.replace([1, 2], [np.nan, 0]) tm.assert_series_equal(result, ser) ser = pd.Series([0, 1, 2, 3, 4]) result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) def test_replace_gh5319(self): # API change from 0.12? # GH 5319 ser = pd.Series([0, np.nan, 2, 3, 4]) expected = ser.ffill() result = ser.replace([np.nan]) tm.assert_series_equal(result, expected) ser = pd.Series([0, np.nan, 2, 3, 4]) expected = ser.ffill() result = ser.replace(np.nan) tm.assert_series_equal(result, expected) def test_replace_datetime64(self): # GH 5797 ser = pd.Series(pd.date_range("20130101", periods=5)) expected = ser.copy() expected.loc[2] = pd.Timestamp("20120101") result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")}) tm.assert_series_equal(result, expected) result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) tm.assert_series_equal(result, expected) def test_replace_nat_with_tz(self): # GH 11792: Test with replacing NaT in a list with tz data ts = pd.Timestamp("2015/01/01", tz="UTC") s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")]) result = s.replace([np.nan, pd.NaT], pd.Timestamp.min) expected = pd.Series([pd.Timestamp.min, ts], dtype=object) tm.assert_series_equal(expected, result) def test_replace_timedelta_td64(self): tdi = pd.timedelta_range(0, periods=5) ser = pd.Series(tdi) # Using a single dict argument means we go through replace_list result = ser.replace({ser[1]: ser[3]}) expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]]) tm.assert_series_equal(result, expected) def test_replace_with_single_list(self): ser = pd.Series([0, 1, 2, 3, 4]) result = ser.replace([1, 2, 3]) tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) s = ser.copy() return_value = s.replace([1, 2, 3], inplace=True) assert return_value is None tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) # make sure things don't get corrupted when fillna call fails s = ser.copy() msg = ( r"Invalid fill method\. Expecting pad \(ffill\) or backfill " r"\(bfill\)\. Got crash_cymbal" ) with pytest.raises(ValueError, match=msg): return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") assert return_value is None tm.assert_series_equal(s, ser) def test_replace_mixed_types(self): ser = pd.Series(np.arange(5), dtype="int64") def check_replace(to_rep, val, expected): sc = ser.copy() result = ser.replace(to_rep, val) return_value = sc.replace(to_rep, val, inplace=True) assert return_value is None tm.assert_series_equal(expected, result) tm.assert_series_equal(expected, sc) # 3.0 can still be held in our int64 series, so we do not upcast GH#44940 tr, v = [3], [3.0] check_replace(tr, v, ser) # Note this matches what we get with the scalars 3 and 3.0 check_replace(tr[0], v[0], ser) # MUST upcast to float e = pd.Series([0, 1, 2, 3.5, 4]) tr, v = [3], [3.5] check_replace(tr, v, e) # casts to object e = pd.Series([0, 1, 2, 3.5, "a"]) tr, v = [3, 4], [3.5, "a"] check_replace(tr, v, e) # again casts to object e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")]) tr, v = [3, 4], [3.5, pd.Timestamp("20130101")] check_replace(tr, v, e) # casts to object e = pd.Series([0, 1, 2, 3.5, True], dtype="object") tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) # test an object with dates + floats + integers + strings dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D")) result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"]) expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object) tm.assert_series_equal(result, expected) def test_replace_bool_with_string_no_op(self): s = pd.Series([True, False, True]) result = s.replace("fun", "in-the-sun") tm.assert_series_equal(s, result) def test_replace_bool_with_string(self): # nonexistent elements s = pd.Series([True, False, True]) result = s.replace(True, "2u") expected = pd.Series(["2u", False, "2u"]) tm.assert_series_equal(expected, result) def test_replace_bool_with_bool(self): s = pd.Series([True, False, True]) result = s.replace(True, False) expected = pd.Series([False] * len(s)) tm.assert_series_equal(expected, result) def test_replace_with_dict_with_bool_keys(self): s = pd.Series([True, False, True]) result = s.replace({"asdf": "asdb", True: "yes"}) expected = pd.Series(["yes", False, "yes"]) tm.assert_series_equal(result, expected) def test_replace_Int_with_na(self, any_int_ea_dtype): # GH 38267 result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA) expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype) tm.assert_series_equal(result, expected) result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA) result.replace(1, pd.NA, inplace=True) tm.assert_series_equal(result, expected) def test_replace2(self): N = 100 ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) ser[:5] = np.nan ser[6:10] = "foo" ser[20:30] = "bar" # replace list with a single value rs = ser.replace([np.nan, "foo", "bar"], -1) assert (rs[:5] == -1).all() assert (rs[6:10] == -1).all() assert (rs[20:30] == -1).all() assert (pd.isna(ser[:5])).all() # replace with different values rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) assert (rs[:5] == -1).all() assert (rs[6:10] == -2).all() assert (rs[20:30] == -3).all() assert (pd.isna(ser[:5])).all() # replace with different values with 2 lists rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) tm.assert_series_equal(rs, rs2) # replace inplace return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True) assert return_value is None assert (ser[:5] == -1).all() assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all() def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype): # GH 32621, GH#44940 ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype) expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype) result = ser.replace({"one": "1", "two": "2"}) tm.assert_series_equal(expected, result) def test_replace_with_empty_dictlike(self): # GH 15289 s = pd.Series(list("abcd")) tm.assert_series_equal(s, s.replace({})) with tm.assert_produces_warning(FutureWarning): empty_series = pd.Series([]) tm.assert_series_equal(s, s.replace(empty_series)) def test_replace_string_with_number(self): # GH 15743 s = pd.Series([1, 2, 3]) result = s.replace("2", np.nan) expected = pd.Series([1, 2, 3]) tm.assert_series_equal(expected, result) def test_replace_replacer_equals_replacement(self): # GH 20656 # make sure all replacers are matching against original values s = pd.Series(["a", "b"]) expected = pd.Series(["b", "a"]) result = s.replace({"a": "b", "b": "a"}) tm.assert_series_equal(expected, result) def test_replace_unicode_with_number(self): # GH 15743 s = pd.Series([1, 2, 3]) result = s.replace("2", np.nan) expected = pd.Series([1, 2, 3]) tm.assert_series_equal(expected, result) def test_replace_mixed_types_with_string(self): # Testing mixed s = pd.Series([1, 2, 3, "4", 4, 5]) result = s.replace([2, "4"], np.nan) expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) tm.assert_series_equal(expected, result) @pytest.mark.parametrize( "categorical, numeric", [ (pd.Categorical(["A"], categories=["A", "B"]), [1]), (pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]), ], ) def test_replace_categorical(self, categorical, numeric): # GH 24971, GH#23305 ser = pd.Series(categorical) result = ser.replace({"A": 1, "B": 2}) expected = pd.Series(numeric).astype("category") if 2 not in expected.cat.categories: # i.e. categories should be [1, 2] even if there are no "B"s present # GH#44940 expected = expected.cat.add_categories(2) tm.assert_series_equal(expected, result) def test_replace_categorical_single(self): # GH 26988 dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") s = pd.Series(dti) c = s.astype("category") expected = c.copy() expected = expected.cat.add_categories("foo") expected[2] = "foo" expected = expected.cat.remove_unused_categories() assert c[2] != "foo" result = c.replace(c[2], "foo") tm.assert_series_equal(expected, result) assert c[2] != "foo" # ensure non-inplace call does not alter original return_value = c.replace(c[2], "foo", inplace=True) assert return_value is None tm.assert_series_equal(expected, c) first_value = c[0] return_value = c.replace(c[1], c[0], inplace=True) assert return_value is None assert c[0] == c[1] == first_value # test replacing with existing value def test_replace_with_no_overflowerror(self): # GH 25616 # casts to object without Exception from OverflowError s = pd.Series([0, 1, 2, 3, 4]) result = s.replace([3], ["100000000000000000000"]) expected = pd.Series([0, 1, 2, "100000000000000000000", 4]) tm.assert_series_equal(result, expected) s = pd.Series([0, "100000000000000000000", "100000000000000000001"]) result = s.replace(["100000000000000000000"], [1]) expected = pd.Series([0, 1, "100000000000000000001"]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "ser, to_replace, exp", [ ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]), (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]), ], ) def test_replace_commutative(self, ser, to_replace, exp): # GH 16051 # DataFrame.replace() overwrites when values are non-numeric series = pd.Series(ser) expected = pd.Series(exp) result = series.replace(to_replace) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])] ) def test_replace_no_cast(self, ser, exp): # GH 9113 # BUG: replace int64 dtype with bool coerces to int64 series = pd.Series(ser) result = series.replace(2, True) expected = pd.Series(exp) tm.assert_series_equal(result, expected) def test_replace_invalid_to_replace(self): # GH 18634 # API: replace() should raise an exception if invalid argument is given series = pd.Series(["a", "b", "c "]) msg = ( r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) with pytest.raises(TypeError, match=msg): series.replace(lambda x: x.strip()) @pytest.mark.parametrize("frame", [False, True]) def test_replace_nonbool_regex(self, frame): obj = pd.Series(["a", "b", "c "]) if frame: obj = obj.to_frame() msg = "'to_replace' must be 'None' if 'regex' is not a bool" with pytest.raises(ValueError, match=msg): obj.replace(to_replace=["a"], regex="foo") @pytest.mark.parametrize("frame", [False, True]) def test_replace_empty_copy(self, frame): obj = pd.Series([], dtype=np.float64) if frame: obj = obj.to_frame() res = obj.replace(4, 5, inplace=True) assert res is None res = obj.replace(4, 5, inplace=False) tm.assert_equal(res, obj) assert res is not obj def test_replace_only_one_dictlike_arg(self, fixed_now_ts): # GH#33340 ser = pd.Series([1, 2, "A", fixed_now_ts, True]) to_replace = {0: 1, 2: "A"} value = "foo" msg = "Series.replace cannot use dict-like to_replace and non-None value" with pytest.raises(ValueError, match=msg): ser.replace(to_replace, value) to_replace = 1 value = {0: "foo", 2: "bar"} msg = "Series.replace cannot use dict-value and non-None to_replace" with pytest.raises(ValueError, match=msg): ser.replace(to_replace, value) def test_replace_extension_other(self, frame_or_series): # https://github.com/pandas-dev/pandas/issues/34530 obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) result = obj.replace("", "") # no exception # should not have changed dtype tm.assert_equal(obj, result) def _check_replace_with_method(self, ser: pd.Series): df = ser.to_frame() res = ser.replace(ser[1], method="pad") expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) tm.assert_series_equal(res, expected) res_df = df.replace(ser[1], method="pad") tm.assert_frame_equal(res_df, expected.to_frame()) ser2 = ser.copy() res2 = ser2.replace(ser[1], method="pad", inplace=True) assert res2 is None tm.assert_series_equal(ser2, expected) res_df2 = df.replace(ser[1], method="pad", inplace=True) assert res_df2 is None tm.assert_frame_equal(df, expected.to_frame()) def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype): arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype) ser = pd.Series(arr) self._check_replace_with_method(ser) @pytest.mark.parametrize("as_categorical", [True, False]) def test_replace_interval_with_method(self, as_categorical): # in particular interval that can't hold NA idx = pd.IntervalIndex.from_breaks(range(4)) ser = pd.Series(idx) if as_categorical: ser = ser.astype("category") self._check_replace_with_method(ser) @pytest.mark.parametrize("as_period", [True, False]) @pytest.mark.parametrize("as_categorical", [True, False]) def test_replace_datetimelike_with_method(self, as_period, as_categorical): idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific") if as_period: idx = idx.tz_localize(None).to_period("D") ser = pd.Series(idx) ser.iloc[-2] = pd.NaT if as_categorical: ser = ser.astype("category") self._check_replace_with_method(ser) def test_replace_with_compiled_regex(self): # https://github.com/pandas-dev/pandas/issues/35680 s = pd.Series(["a", "b", "c"]) regex = re.compile("^a$") result = s.replace({regex: "z"}, regex=True) expected = pd.Series(["z", "b", "c"]) tm.assert_series_equal(result, expected) def test_pandas_replace_na(self): # GH#43344 ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string") regex_mapping = { "AA": "CC", "BB": "CC", "EE": "CC", "CC": "CC-REPL", } result = ser.replace(regex_mapping, regex=True) exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string") tm.assert_series_equal(result, exp) @pytest.mark.parametrize( "dtype, input_data, to_replace, expected_data", [ ("bool", [True, False], {True: False}, [False, False]), ("int64", [1, 2], {1: 10, 2: 20}, [10, 20]), ("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]), ("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), ("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]), ("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]), ( pd.IntervalDtype("int64"), IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]), {pd.Interval(1, 2): pd.Interval(10, 20)}, IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]), ), ( pd.IntervalDtype("float64"), IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]), {pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)}, IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]), ), ( pd.PeriodDtype("M"), [pd.Period("2020-05", freq="M")], {pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")}, [pd.Period("2020-06", freq="M")], ), ], ) def test_replace_dtype(self, dtype, input_data, to_replace, expected_data): # GH#33484 ser = pd.Series(input_data, dtype=dtype) result = ser.replace(to_replace) expected = pd.Series(expected_data, dtype=dtype) tm.assert_series_equal(result, expected) def test_replace_string_dtype(self): # GH#40732, GH#44940 ser = pd.Series(["one", "two", np.nan], dtype="string") res = ser.replace({"one": "1", "two": "2"}) expected = pd.Series(["1", "2", np.nan], dtype="string") tm.assert_series_equal(res, expected) # GH#31644 ser2 = pd.Series(["A", np.nan], dtype="string") res2 = ser2.replace("A", "B") expected2 = pd.Series(["B", np.nan], dtype="string") tm.assert_series_equal(res2, expected2) ser3 = pd.Series(["A", "B"], dtype="string") res3 = ser3.replace("A", pd.NA) expected3 = pd.Series([pd.NA, "B"], dtype="string") tm.assert_series_equal(res3, expected3) def test_replace_string_dtype_list_to_replace(self): # GH#41215, GH#44940 ser = pd.Series(["abc", "def"], dtype="string") res = ser.replace(["abc", "any other string"], "xyz") expected = pd.Series(["xyz", "def"], dtype="string") tm.assert_series_equal(res, expected) def test_replace_string_dtype_regex(self): # GH#31644 ser = pd.Series(["A", "B"], dtype="string") res = ser.replace(r".", "C", regex=True) expected = pd.Series(["C", "C"], dtype="string") tm.assert_series_equal(res, expected) def test_replace_nullable_numeric(self): # GH#40732, GH#44940 floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype()) assert floats.replace({1.0: 9}).dtype == floats.dtype assert floats.replace(1.0, 9).dtype == floats.dtype assert floats.replace({1.0: 9.0}).dtype == floats.dtype assert floats.replace(1.0, 9.0).dtype == floats.dtype res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0]) assert res.dtype == floats.dtype ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype()) assert ints.replace({1: 9}).dtype == ints.dtype assert ints.replace(1, 9).dtype == ints.dtype assert ints.replace({1: 9.0}).dtype == ints.dtype assert ints.replace(1, 9.0).dtype == ints.dtype # FIXME: ints.replace({1: 9.5}) raises bc of incorrect _can_hold_element @pytest.mark.parametrize("regex", [False, True]) def test_replace_regex_dtype_series(self, regex): # GH-48644 series = pd.Series(["0"]) expected = pd.Series([1]) result = series.replace(to_replace="0", value=1, regex=regex) tm.assert_series_equal(result, expected) def test_replace_different_int_types(self, any_int_numpy_dtype): # GH#45311 labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype) maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype) map_dict = {old: new for (old, new) in zip(maps.values, maps.index)} result = labs.replace(map_dict) expected = labs.replace({0: 0, 2: 1, 1: 2}) tm.assert_series_equal(result, expected)
class TestInsertIndexCoercion(CoercionBase): klasses = ["index"] method = "insert" def _assert_insert_conversion(self, original, value, expected, expected_dtype): """test coercion triggered by insert""" target = original.copy() res = target.insert(1, value) tm.assert_index_equal(res, expected) assert res.dtype == expected_dtype @pytest.mark.parametrize( "insert, coerced_val, coerced_dtype", [ (1, 1, object), (1.1, 1.1, object), (False, False, object), ("x", "x", object), ], ) def test_insert_index_object(self, insert, coerced_val, coerced_dtype): obj = pd.Index(list("abcd")) assert obj.dtype == object exp = pd.Index(["a", coerced_val, "b", "c", "d"]) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) @pytest.mark.parametrize( "insert, coerced_val, coerced_dtype", [ (1, 1, np.int64), (1.1, 1.1, np.float64), (False, False, object), # GH#36319 ("x", "x", object), ], ) def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): obj = Int64Index([1, 2, 3, 4]) assert obj.dtype == np.int64 exp = pd.Index([1, coerced_val, 2, 3, 4]) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) @pytest.mark.parametrize( "insert, coerced_val, coerced_dtype", [ (1, 1.0, np.float64), (1.1, 1.1, np.float64), (False, False, object), # GH#36319 ("x", "x", object), ], ) def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): obj = Float64Index([1.0, 2.0, 3.0, 4.0]) assert obj.dtype == np.float64 exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0]) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) @pytest.mark.parametrize( "fill_val,exp_dtype", [ (pd.Timestamp("2012-01-01"), "datetime64[ns]"), (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), ], ids=["datetime64", "datetime64tz"], ) @pytest.mark.parametrize( "insert_value", [ pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1 ], ) def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value): obj = pd.DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz) assert obj.dtype == exp_dtype exp = pd.DatetimeIndex( [ "2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04" ], tz=fill_val.tz, ) self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) if fill_val.tz: # mismatched tzawareness ts = pd.Timestamp("2012-01-01") result = obj.insert(1, ts) expected = obj.astype(object).insert(1, ts) assert expected.dtype == object tm.assert_index_equal(result, expected) # mismatched tz --> cast to object (could reasonably cast to common tz) ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): result = obj.insert(1, ts) # once deprecation is enforced: # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) # assert expected.dtype == obj.dtype expected = obj.astype(object).insert(1, ts) tm.assert_index_equal(result, expected) else: # mismatched tzawareness ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") result = obj.insert(1, ts) expected = obj.astype(object).insert(1, ts) assert expected.dtype == object tm.assert_index_equal(result, expected) item = 1 result = obj.insert(1, item) expected = obj.astype(object).insert(1, item) assert expected[1] == item assert expected.dtype == object tm.assert_index_equal(result, expected) def test_insert_index_timedelta64(self): obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"]) assert obj.dtype == "timedelta64[ns]" # timedelta64 + timedelta64 => timedelta64 exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"]) self._assert_insert_conversion(obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]") for item in [pd.Timestamp("2012-01-01"), 1]: result = obj.insert(1, item) expected = obj.astype(object).insert(1, item) assert expected.dtype == object tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "insert, coerced_val, coerced_dtype", [ (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"), (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object), (1, 1, object), ("x", "x", object), ], ) def test_insert_index_period(self, insert, coerced_val, coerced_dtype): obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M") assert obj.dtype == "period[M]" data = [ pd.Period("2011-01", freq="M"), coerced_val, pd.Period("2011-02", freq="M"), pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M"), ] if isinstance(insert, pd.Period): exp = pd.PeriodIndex(data, freq="M") self._assert_insert_conversion(obj, insert, exp, coerced_dtype) # string that can be parsed to appropriate PeriodDtype self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype) else: result = obj.insert(0, insert) expected = obj.astype(object).insert(0, insert) tm.assert_index_equal(result, expected) # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M" # casts that string to Period[M], not clear that is desirable if not isinstance(insert, pd.Timestamp): # non-castable string result = obj.insert(0, str(insert)) expected = obj.astype(object).insert(0, str(insert)) tm.assert_index_equal(result, expected) msg = r"Unexpected keyword arguments {'freq'}" with pytest.raises(TypeError, match=msg): with tm.assert_produces_warning(FutureWarning): # passing keywords to pd.Index pd.Index(data, freq="M") @pytest.mark.xfail(reason="Test not implemented") def test_insert_index_complex128(self): raise NotImplementedError @pytest.mark.xfail(reason="Test not implemented") def test_insert_index_bool(self): raise NotImplementedError
assert result is array def test_array_multiindex_raises(): idx = pd.MultiIndex.from_product([['A'], ['a', 'b']]) with pytest.raises(ValueError, match='MultiIndex'): idx.array @pytest.mark.parametrize( 'array, expected', [ (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), (pd.Categorical(['a', 'b']), np.array(['a', 'b'], dtype=object)), (pd.core.arrays.period_array(['2000', '2001'], freq='D'), np.array([pd.Period('2000', freq="D"), pd.Period('2001', freq='D')])), (pd.core.arrays.integer_array( [0, np.nan]), np.array([0, np.nan], dtype=object)), (pd.core.arrays.IntervalArray.from_breaks([0, 1, 2]), np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object)), (pd.SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), # TODO: DatetimeArray(add) ]) @pytest.mark.parametrize('box', [pd.Series, pd.Index]) def test_to_numpy(array, expected, box): thing = box(array) if array.dtype.name in ('Int64', 'Sparse[int64, 0]') and box is pd.Index: pytest.skip("No index type for {}".format(array.dtype))
def main(): #read in data indPath = "Data/30_Industry_Portfolios.CSV" rfPath = "Data/rf.csv" begDate = "195912" endDate = "201612" (ind, rf) = loadData(indPath, rfPath, begDate, endDate) exsReturns = excessReturns(ind, rf) nrow = rf.count()[0] #create summary table (table 1 in paper) sumTable = summaryStat(exsReturns.iloc[:, 1:]) # no date #OLS post Lasso with entire time period indNames = list(exsReturns.iloc[:, 1:]) df = exsReturns #with date (inter, fullPeriodResult) = OLSlassoRegression(df) # use aic # expanding period L/S portfolio construction startRow = 0 endRow = df.loc[df["Date"] == 196912].index[ 0] #start before the first prediction date (paper uses 196912) lastRow = df.loc[df["Date"] == 201612].index[0] periodR = pd.DataFrame(np.zeros(lastRow - endRow)) # start with first predicting row dateIndex = pd.period_range(start=pd.Period(str(df.loc[endRow + 1, "Date"]), freq="M"), end=pd.Period(str(df.loc[lastRow, "Date"]), freq="M"), freq="M") indBetaList = [ pd.DataFrame(np.zeros((lastRow - endRow, len(indNames))), index=dateIndex, columns=indNames) for i in range(len(indNames)) ] for e in range(endRow, lastRow): #change this to OLSlassoRegression(df, endRow = e, mode="predict") (yPred, betas) = OLSlassoRegression(df, endRow=e, mode="predict") yPred.sort_values(by=["yPred"], ascending=True, inplace=True) #print("yPred = ", yPred) #after sorted returns, long top quintile, and short bottom quintile bottomInd = yPred.iloc[:5, :].index #find the industries topInd = yPred.iloc[-5:, :].index bottomR = df.loc[endRow + 1, bottomInd] #get the realized returns topR = df.loc[endRow + 1, topInd] print(indBetaList[0].index[e - endRow], np.round(np.average(topR)), np.round(np.average(bottomR)), np.round(np.average(topR) - np.average(bottomR))) periodR.iloc[e - endRow, :] = np.mean(topR) - np.mean(bottomR) for i in range(len(indNames)): indBeta = indBetaList[i] indBeta.iloc[e - endRow, :] = betas.loc[betas.index[i], :] print(np.mean(periodR) * 12) # print(indBeta) # for i in range(len(indNames)): # indBeta = indBetaList[i] # writer = pd.ExcelWriter(indNames[i] + " betas over time.xlsx") # indBeta.to_excel(writer, "Sheet1") # writer.save() # lineplot(indBeta.index, indBeta, "Date", "OLS post Lasso Coefficient", indNames[i] + " Betas Over Time") # indBeta = indBetaList[0] # writer = pd.ExcelWriter(indNames[0] + " betas over time.xlsx") # indBeta.to_excel(writer, "Sheet1") # writer.save() lineplot(indBeta.index, indBeta, "Date", "OLS post Lasso Coefficient", indNames[0] + " Betas Over Time")
'X4_Mon_logerror6std', 'X4_Mon_logerror3mean', 'X1_Mon_logerror6mean'] x = train.yrmonth train = train[cols] y = joblib.load("../input/y.pkl") ################################################# # Val Split ################################################# #x = pd.read_csv('../input/train_2016_v2.csv') #x["transactiondate"] = pd.to_datetime(x["transactiondate"]) #x["yrmonth"] = x["transactiondate"].apply(lambda x: x.strftime('%Y%m')).astype(int) y_logit = x valindex = y_logit > pd.Period('2017-05') trainindex = y_logit <= pd.Period('2017-05') valid = train[valindex] yval = y[valindex] #train = train[trainindex]# #y = y[trainindex] ################################################# lbound = np.mean(y) - 3 * np.std(y) ubound = np.mean(y) + 3 * np.std(y) test = joblib.load("../input/teststat.pkl") test = test[cols] #test = valid.copy() gc.collect()
('decimal', [Decimal(1), np.nan, Decimal(2)]), ('boolean', [True, np.nan, False]), ('datetime64', [np.datetime64('2013-01-01'), np.nan, np.datetime64('2018-01-01')]), ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]), ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), # The following two dtypes are commented out due to GH 23554 # ('complex', [1 + 1j, np.nan, 2 + 2j]), # ('timedelta64', [np.timedelta64(1, 'D'), # np.nan, np.timedelta64(2, 'D')]), ('timedelta', [timedelta(1), np.nan, timedelta(2)]), ('time', [time(1), np.nan, time(2)]), ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]), ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]) ] ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id @pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) def any_skipna_inferred_dtype(request): """ Fixture for all inferred dtypes from _libs.lib.infer_dtype The covered (inferred) types are: * 'string' * 'empty'
def test_pandas_period_index(self): v = self.cls(['x'], pd.period_range(start='2000', periods=20, freq='B')) self.assertEqual(v[0], pd.Period('2000', freq='B')) assert "Period('2000-01-03', 'B')" in repr(v)
import itertools import numpy as np import pandas as pd import pytest from hamcrest import assert_that, none, not_none, calling, raises, close_to import cifrum as lib from conftest import decimal_places, delta from cifrum._portfolio.currency import PortfolioCurrencyFactory from cifrum.common.enums import Currency __end_period = pd.Period('2018-12', freq='M') @pytest.fixture def pcf(): return lib.obj_graph.provide(PortfolioCurrencyFactory) @pytest.mark.parametrize( 'currency, inflation_kind', itertools.product(Currency, ['values', 'cumulative', 'a_mean', 'g_mean'])) def test__exists_for_all_currencies(pcf: PortfolioCurrencyFactory, currency: Currency, inflation_kind: str): pc = pcf.new(currency=currency) infl = pc.inflation(kind=inflation_kind, end_period=__end_period, years_ago=4) assert_that(infl, not_none())
def net_time_period_to_pandas_period(net_time_period, freq): start_datetime = net_datetime_to_py_datetime(net_time_period.Start) return pd.Period(start_datetime, freq=freq)
def export_dataset(db, dataset): """Export all series for one Dataset Return array - one line by serie """ #TODO: Utiliser une queue Redis car trop de code en RAM ? start = time.time() headers = ['key'] + dataset['dimension_keys'] #['key', 'freq', 'geo', 'na_item', 'nace_r2', 'unit'] # revient à 0 et -1 ? dmin = float('inf') dmax = -float('inf') query = { 'provider_name': dataset['provider_name'], "dataset_code": dataset['dataset_code'] } series_list = db[constants.COL_SERIES].find(query) for s in series_list: #collect la première et dernière date trouvé """ Permet d'avoir ensuite une plage de date la plus ancienne à la plus récente car chaque série n'a pas toujours les mêmes dates """ if s['start_date'] < dmin: dmin = s['start_date'] if s['end_date'] > dmax: dmax = s['end_date'] freq = s['frequency'] series_list.rewind() pDmin = pandas.Period(ordinal=dmin, freq=freq) pDmax = pandas.Period(ordinal=dmax, freq=freq) headers += list( pandas.period_range(pDmin, pDmax, freq=freq).to_native_types()) #['key', 'freq', 'geo', 'na_item', 'nace_r2', 'unit', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014'] elements = [headers] series_list.rewind() def row_process(s): row = [s['key']] for c in dataset['dimension_keys']: if c in s['dimensions']: row.append(s['dimensions'][c]) else: row.append('') p_start_date = pandas.Period(ordinal=s['start_date'], freq=freq) p_end_date = pandas.Period(ordinal=s['end_date'], freq=freq) """ pDmin : pandas.Period() la plus ancienne p_start_date-1 : périod en cours -1 >>> p_start_date -1 Period('1994', 'A-DEC') Bug: ne renvoi rien si p_start_date -1 devient identique à pDmin """ # Les None sont pour les périodes qui n'ont pas de valeur correspondantes _row = [ None for d in pandas.period_range(pDmin, p_start_date - 1, freq=freq) ] row.extend(_row) _row = [val["value"] for val in s['values']] row.extend(_row) _row = [ None for d in pandas.period_range(p_end_date + 1, pDmax, freq=freq) ] row.extend(_row) return row for s in series_list: elements.append(row_process(s)) end = time.time() - start logger.info("export_dataset - %s : %.3f" % (dataset['dataset_code'], end)) return elements
def test_is_period(self): self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) self.assertFalse(lib.is_period(1)) self.assertFalse(lib.is_period(np.nan))
marks=not_implemented_mark, ), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])), marks=not_implemented_mark, ), pytest.param( (pd.DataFrame, frame_data, operator.methodcaller("quantile")), marks=not_implemented_mark, ), ( pd.DataFrame, ({ "A": [1] }, [pd.Period("2000", "D")]), operator.methodcaller("to_timestamp"), ), ( pd.DataFrame, ({ "A": [1] }, [pd.Timestamp("2000")]), operator.methodcaller("to_period", freq="D"), ), pytest.param( (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])), marks=not_implemented_mark, ), pytest.param( (pd.DataFrame, frame_mi_data,
def test_count_nonnumeric_types(self): # GH12541 cols = [ "int", "float", "string", "datetime", "timedelta", "periods", "fl_inf", "fl_nan", "str_nan", "dt_nat", "periods_nat", ] df = DataFrame( { "int": [1, 2, 3], "float": [4.0, 5.0, 6.0], "string": list("abc"), "datetime": pd.date_range("20170101", periods=3), "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), "periods": [ pd.Period("2012-01"), pd.Period("2012-02"), pd.Period("2012-03"), ], "fl_inf": [1.0, 2.0, np.Inf], "fl_nan": [1.0, 2.0, np.NaN], "str_nan": ["aa", "bb", np.NaN], "dt_nat": [ Timestamp("20170101"), Timestamp("20170203"), Timestamp(None), ], "periods_nat": [ pd.Period("2012-01"), pd.Period("2012-02"), pd.Period(None), ], }, columns=cols, ) expected = DataFrame( { "int": [1.0, 2.0, 2.0], "float": [1.0, 2.0, 2.0], "string": [1.0, 2.0, 2.0], "datetime": [1.0, 2.0, 2.0], "timedelta": [1.0, 2.0, 2.0], "periods": [1.0, 2.0, 2.0], "fl_inf": [1.0, 2.0, 2.0], "fl_nan": [1.0, 2.0, 1.0], "str_nan": [1.0, 2.0, 1.0], "dt_nat": [1.0, 2.0, 1.0], "periods_nat": [1.0, 2.0, 1.0], }, columns=cols, ) result = df.rolling(window=2).count() tm.assert_frame_equal(result, expected) result = df.rolling(1).count() expected = df.notna().astype(float) tm.assert_frame_equal(result, expected)
def test_timestep_greater_than_zero_days(): """Test trying to create zero length Timestep.""" with pytest.raises(ValueError): # Test setting days <= 0 raises an error Timestep(pandas.Period('2019-01-01', freq='D'), 0, 0)
def date_to_period(row, freq='D'): """ 'Date' is a string. Create a Period. Default is year, month and day. """ date = row[common.DATE_COL] period = pd.Period(date, freq=freq) return period
def dashboard_sourceData(request): if request.method == 'GET': json_out = {} main_out = {} days_num = 0 data = {} try: # sourcedata days_list = [] day = pd.Period(datetime.datetime.now(),freq='D') # logger.info(type(Topic.objects)) # Topic.objects.all()Topic.objects()返回类型相同 topic_list = Topic.objects datatype_list = Datatype_name.objects today = date.today() post_7days = Post.objects(Q(pt_time__gte=datetime.datetime.combine\ (today-datetime.timedelta(7),datetime.time.min)) & Q(pt_time__lte=datetime.datetime.combine(today, datetime.time.max))) while days_num < 7: day_str = day.strftime('%Y%m%d') day_change = today - datetime.timedelta(days=days_num) post = post_7days(Q(pt_time__gte=\ datetime.datetime.combine(day_change,datetime.time.min)) & \ Q(pt_time__lte=datetime.datetime.combine\ (day_change, datetime.time.max))) for topic in topic_list: for data in datatype_list: day_dict = {} day_dict['time'] = day_str day_dict['topic_id'] = topic._id day_dict['topic_name'] = topic.topic_name day_dict['dataTypeName'] = data.datatype_name day_dict['data_type'] = data.data_type post_datatype = post(Q(data_type=data.data_type) & Q(topic_id=topic._id)) # logger.info('post_num = ' + str(len(post_datatype))) day_dict['post_num'] = post_datatype.count() days_list.append(day_dict) for data in datatype_list: day_dict = {} day_dict['time'] = day_str day_dict['topic_id'] = 0 day_dict['topic_name'] = '' day_dict['dataTypeName'] = data.datatype_name day_dict['data_type'] = data.data_type post_datatype = post(Q(data_type=data.data_type) & Q(topic_id=0)) # logger.info('post_num = ' + str(len(post_datatype))) day_dict['post_num'] = post_datatype.count() days_list.append(day_dict) day -= 1 days_num += 1 ####### Hot for all host posts hot_dict = {} hot_posts = post_7days(Q(topic_id__ne=0) & (Q(data_type=3) | Q(data_type=2))).order_by \ ('-comm_num')[:10].only("_id", "url", \ "board", "title", "content", "pt_time", \ "img_url", "poster") # hot_poster = post_7days.only('poster').all() # logger.info("hot_poster = " + str(hot_poster.count())) hot_weibo = post_7days(Q(topic_id__ne=0) & Q(data_type=2)).order_by \ ('-comm_num')[:10].only("_id", "url", \ "board", "title", "content", "pt_time", \ "img_url") hot_dict['hotPost'] = handle_post_list(hot_posts) hot_dict['hotPoster'] = handle_poster_list(hot_posts) hot_dict['hotWeibo'] = handle_post_list(hot_weibo) # wordlist = [] # wordres=Cloud_formain.objects.only("word", "frequency") # for worditem in wordres: # temp={} # temp['word']=worditem.word # temp['weight']=worditem.frequency # wordlist.append(temp) ####### map data mapData_list = [{'id':'001', 'pro':"陕西", 'nums':52 }, { 'id':'002', 'pro':"北京", 'nums':100 }, { 'id':'003', 'pro':"上海", 'nums':60 }, { 'id':'004', 'pro':"杭州", 'nums':48 }, { 'id':'005', 'pro':"南京", 'nums':50 } ] main_out['mapData'] = mapData_list main_out['sourceData'] = days_list main_out['Hot'] = hot_dict # main_out['word_cloud'] = wordlist json_out['code'] = 0 json_out['success'] = True json_out['data'] = main_out except: traceback.print_exc() json_out['code'] = 1 json_out['data'] = {} json_out['success'] = False return HttpResponse(json.dumps(json_out, cls=MyEncoder),content_type="application/json")
def _to_timestamp(year): return pd.Period(year=year, freq='Y').to_timestamp()
class TestDataFrameAppend: @pytest.mark.filterwarnings( "ignore:.*append method is deprecated.*:FutureWarning") def test_append_multiindex(self, multiindex_dataframe_random_data, frame_or_series): obj = multiindex_dataframe_random_data obj = tm.get_obj(obj, frame_or_series) a = obj[:5] b = obj[5:] result = a.append(b) tm.assert_equal(result, obj) def test_append_empty_list(self): # GH 28769 df = DataFrame() result = df._append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) result = df._append([]) expected = df tm.assert_frame_equal(result, expected) assert result is not df # ._append() should return a new object def test_append_series_dict(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) series = df.loc[4] msg = "Indexes have overlapping values" with pytest.raises(ValueError, match=msg): df._append(series, verify_integrity=True) series.name = None msg = "Can only append a Series if ignore_index=True" with pytest.raises(TypeError, match=msg): df._append(series, verify_integrity=True) result = df._append(series[::-1], ignore_index=True) expected = df._append(DataFrame({ 0: series[::-1] }, index=df.columns).T, ignore_index=True) tm.assert_frame_equal(result, expected) # dict result = df._append(series.to_dict(), ignore_index=True) tm.assert_frame_equal(result, expected) result = df._append(series[::-1][:3], ignore_index=True) expected = df._append(DataFrame({ 0: series[::-1][:3] }).T, ignore_index=True, sort=True) tm.assert_frame_equal(result, expected.loc[:, result.columns]) msg = "Can only append a dict if ignore_index=True" with pytest.raises(TypeError, match=msg): df._append(series.to_dict()) # can append when name set row = df.loc[4] row.name = 5 result = df._append(row) expected = df._append(df[-1:], ignore_index=True) tm.assert_frame_equal(result, expected) def test_append_list_of_series_dicts(self): df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [x.to_dict() for idx, x in df.iterrows()] result = df._append(dicts, ignore_index=True) expected = df._append(df, ignore_index=True) tm.assert_frame_equal(result, expected) # different columns dicts = [ { "foo": 1, "bar": 2, "baz": 3, "peekaboo": 4 }, { "foo": 5, "bar": 6, "baz": 7, "peekaboo": 8 }, ] result = df._append(dicts, ignore_index=True, sort=True) expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_list_retain_index_name(self): df = DataFrame([[1, 2], [3, 4]], index=pd.Index(["a", "b"], name="keepthisname")) serc = Series([5, 6], name="c") expected = DataFrame( [[1, 2], [3, 4], [5, 6]], index=pd.Index(["a", "b", "c"], name="keepthisname"), ) # append series result = df._append(serc) tm.assert_frame_equal(result, expected) # append list of series result = df._append([serc]) tm.assert_frame_equal(result, expected) def test_append_missing_cols(self): # GH22252 # exercise the conditional branch in append method where the data # to be appended is a list and does not contain all columns that are in # the target DataFrame df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) dicts = [{"foo": 9}, {"bar": 10}] result = df._append(dicts, ignore_index=True, sort=True) expected = df._append(DataFrame(dicts), ignore_index=True, sort=True) tm.assert_frame_equal(result, expected) def test_append_empty_dataframe(self): # Empty df append empty df df1 = DataFrame() df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-empty df append empty df df1 = DataFrame(np.random.randn(5, 2)) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Empty df with columns append empty df df1 = DataFrame(columns=["bar", "foo"]) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) # Non-Empty df with columns append empty df df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) def test_append_dtypes(self): # GH 5754 # row appends of different dtypes (so need to do by-item) # can sometimes infer the correct type df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) df2 = DataFrame() result = df1._append(df2) expected = df1.copy() tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) result = df1._append(df2) expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) result = df1._append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}) expected = expected.astype(object) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) result = df1._append(df2) expected = DataFrame( {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")}) expected = expected.astype(object) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": np.nan}, index=range(1)) df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) result = df1._append(df2) expected = DataFrame( {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")}) expected = expected.astype(object) tm.assert_frame_equal(result, expected) df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) result = df1._append(df2) expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"]) def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 tz = tz_naive_fixture df = DataFrame([Timestamp(timestamp, tz=tz)]) result = df._append(df.iloc[0]).iloc[-1] expected = Series(Timestamp(timestamp, tz=tz), name=0) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "data, dtype", [ ([1], pd.Int64Dtype()), ([1], pd.CategoricalDtype()), ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), ([1], pd.SparseDtype()), ], ) def test_other_dtypes(self, data, dtype): df = DataFrame(data, dtype=dtype) result = df._append(df.iloc[0]).iloc[-1] expected = Series(data, name=0, dtype=dtype) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) def test_append_numpy_bug_1681(self, dtype): # another datetime64 bug if dtype == "datetime64[ns]": index = date_range("2011/1/1", "2012/1/1", freq="W-FRI") else: index = timedelta_range("1 days", "10 days", freq="2D") df = DataFrame() other = DataFrame({"A": "foo", "B": index}, index=index) result = df._append(other) assert (result["B"] == index).all() @pytest.mark.filterwarnings("ignore:The values in the array:RuntimeWarning" ) def test_multiindex_column_append_multiple(self): # GH 29699 df = DataFrame( [[1, 11], [2, 12], [3, 13]], columns=pd.MultiIndex.from_tuples([("multi", "col1"), ("multi", "col2")], names=["level1", None]), ) df2 = df.copy() for i in range(1, 10): df[i, "colA"] = 10 df = df._append(df2, ignore_index=True) result = df["multi"] expected = DataFrame({ "col1": [1, 2, 3] * (i + 1), "col2": [11, 12, 13] * (i + 1) }) tm.assert_frame_equal(result, expected) def test_append_raises_future_warning(self): # GH#35407 df1 = DataFrame([[1, 2], [3, 4]]) df2 = DataFrame([[5, 6], [7, 8]]) with tm.assert_produces_warning(FutureWarning): df1.append(df2)
import numpy as np import pandas as pd from hamcrest import assert_that, close_to, calling, raises import yapo as y from conftest import decimal_places, delta from yapo._settings import _MONTHS_PER_YEAR from yapo.common.time_series import TimeSeriesKind __asset_name = 'mut_ru/0890-94127385' __portfolio_period_start = pd.Period('2011-1', freq='M') __portfolio_period_end = pd.Period('2017-2', freq='M') __asset = y.portfolio_asset(name=__asset_name, start_period=str(__portfolio_period_start), end_period=str(__portfolio_period_end), currency='USD') def test__cumulative_get_return(): arors = __asset.get_return(kind='cumulative').values assert_that(arors.max(), close_to(.0924, delta)) assert_that(arors.min(), close_to(-.5464, delta)) arors_real = __asset.get_return(kind='cumulative', real=True).values assert_that(arors_real.max(), close_to(.0765, delta)) assert_that(arors_real.min(), close_to(-.5725, delta)) def test__ytd_get_return(): ror_ytd = __asset.get_return(kind='ytd') assert ror_ytd.start_period == pd.Period('2012-1', freq='M')
[1, 2], np.dtype("float32"), PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), ), (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])), ( np.array([1.0, 2.0], dtype="float64"), None, FloatingArray._from_sequence([1.0, 2.0]), ), # String alias passes through to NumPy ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), # Period alias ( [pd.Period("2000", "D"), pd.Period("2001", "D")], "Period[D]", period_array(["2000", "2001"], freq="D"), ), # Period dtype ( [pd.Period("2000", "D")], pd.PeriodDtype("D"), period_array(["2000"], freq="D"), ), # Datetime (naive) ( [1, 2], np.dtype("datetime64[ns]"), DatetimeArray._from_sequence(
class TestInsertIndexCoercion(CoercionBase): klasses = ['index'] method = 'insert' def _assert_insert_conversion(self, original, value, expected, expected_dtype): """ test coercion triggered by insert """ target = original.copy() res = target.insert(1, value) tm.assert_index_equal(res, expected) assert res.dtype == expected_dtype @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [(1, 1, np.object), (1.1, 1.1, np.object), (False, False, np.object), ('x', 'x', np.object)]) def test_insert_index_object(self, insert, coerced_val, coerced_dtype): obj = pd.Index(list('abcd')) assert obj.dtype == np.object exp = pd.Index(['a', coerced_val, 'b', 'c', 'd']) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [(1, 1, np.int64), (1.1, 1.1, np.float64), (False, 0, np.int64), ('x', 'x', np.object)]) def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): obj = pd.Int64Index([1, 2, 3, 4]) assert obj.dtype == np.int64 exp = pd.Index([1, coerced_val, 2, 3, 4]) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) @pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [(1, 1., np.float64), (1.1, 1.1, np.float64), (False, 0., np.float64), ('x', 'x', np.object)]) def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): obj = pd.Float64Index([1., 2., 3., 4.]) assert obj.dtype == np.float64 exp = pd.Index([1., coerced_val, 2., 3., 4.]) self._assert_insert_conversion(obj, insert, exp, coerced_dtype) @pytest.mark.parametrize('fill_val,exp_dtype', [(pd.Timestamp('2012-01-01'), 'datetime64[ns]'), (pd.Timestamp('2012-01-01', tz='US/Eastern'), 'datetime64[ns, US/Eastern]')], ids=['datetime64', 'datetime64tz']) def test_insert_index_datetimes(self, fill_val, exp_dtype): obj = pd.DatetimeIndex( ['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], tz=fill_val.tz) assert obj.dtype == exp_dtype exp = pd.DatetimeIndex([ '2011-01-01', fill_val.date(), '2011-01-02', '2011-01-03', '2011-01-04' ], tz=fill_val.tz) self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) msg = "Passed item and index have different timezone" if fill_val.tz: with tm.assert_raises_regex(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01')) with tm.assert_raises_regex(ValueError, msg): obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo')) msg = "cannot insert DatetimeIndex with incompatible label" with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) pytest.xfail("ToDo: must coerce to object") def test_insert_index_timedelta64(self): obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day']) assert obj.dtype == 'timedelta64[ns]' # timedelta64 + timedelta64 => timedelta64 exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day']) self._assert_insert_conversion(obj, pd.Timedelta('10 day'), exp, 'timedelta64[ns]') # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" with tm.assert_raises_regex(TypeError, msg): obj.insert(1, pd.Timestamp('2012-01-01')) # ToDo: must coerce to object msg = "cannot insert TimedeltaIndex with incompatible label" with tm.assert_raises_regex(TypeError, msg): obj.insert(1, 1) @pytest.mark.parametrize( "insert, coerced_val, coerced_dtype", [(pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'), (pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object), (1, 1, np.object), ('x', 'x', np.object)]) def test_insert_index_period(self, insert, coerced_val, coerced_dtype): obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'], freq='M') assert obj.dtype == 'period[M]' if isinstance(insert, pd.Period): index_type = pd.PeriodIndex else: index_type = pd.Index exp = index_type([ pd.Period('2011-01', freq='M'), coerced_val, pd.Period('2011-02', freq='M'), pd.Period('2011-03', freq='M'), pd.Period('2011-04', freq='M') ], freq='M') self._assert_insert_conversion(obj, insert, exp, coerced_dtype) def test_insert_index_complex128(self): pass def test_insert_index_bool(self): pass
#################################################################### # basic date parse (string to date): df = pd.read_csv("file.csv", parse_dates["column_name"]) # Use the DATE column as INDEX (first convert the value to date with parse) df = pd.read_csv("file.csv", parse_dates["column_name"], index_col["column_name"]) # now you can use partial index to retrieve all of the month etc. df["2020-01"] # can also add metrics: df["2020-01"].mean() # can use range of dates: df['2020-01-01':'2020-01-20'] # Define Datetime periods: m = pd.Period('2020-1',freq='M') # print out the starttime/endtime m.start_time / m.end_time # operations: m+1 (adds febuary) # TIMEZONES: from pytz import all_timezones print(all_timezones) #this will show all the timezones available # python has 2 datetime objects: 'Naive' (no timezone) and 'Timezone Aware' # assign timezone to date index: df = df.tz_localize(tz='US/Eastern') df.index # now it will show timezone with UTC-4 # Convert to Berlin: df = df.tz_convert(tz='Europe/Berlin')
def test_isscalar_pandas_scalars(self): self.assertTrue(isscalar(pd.Timestamp('2014-01-01'))) self.assertTrue(isscalar(pd.Timedelta(hours=1))) self.assertTrue(isscalar(pd.Period('2014-01-01')))
# <a id="371"></a> <br> # ### 3-7-1 Timestamp # In[ ]: pd.Timestamp('9/1/2016 10:05AM') # <a id="372"></a> <br> # ### 3-7-2 Period # In[ ]: pd.Period('1/2016') # In[ ]: pd.Period('3/5/2016') # <a id="373"></a> <br> # ### 3-7-3 DatetimeIndex # In[ ]: t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')]) t1
("floating", [1.0, np.nan, 2.0]), ("integer", [1, np.nan, 2]), ("mixed-integer-float", [1, np.nan, 2.0]), ("decimal", [Decimal(1), np.nan, Decimal(2)]), ("boolean", [True, np.nan, False]), ("boolean", [True, pd.NA, False]), ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]), ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), # The following two dtypes are commented out due to GH 23554 # ('complex', [1 + 1j, np.nan, 2 + 2j]), # ('timedelta64', [np.timedelta64(1, 'D'), # np.nan, np.timedelta64(2, 'D')]), ("timedelta", [timedelta(1), np.nan, timedelta(2)]), ("time", [time(1), np.nan, time(2)]), ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]), ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]), ] ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id @pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) def any_skipna_inferred_dtype(request): """ Fixture for all inferred dtypes from _libs.lib.infer_dtype The covered (inferred) types are: * 'string' * 'empty' * 'bytes' * 'mixed'