def fit_ls_to_EIregret_sub(df, wid, subseries_fname, savefolder, force_run_all=False): # try: tic = time() print 'begin ' + wid sys.stdout.flush() subfname = savefolder + subseries_fname + wid + '.pkl' if (not isfile(subfname)) or force_run_all: print str(wid) + ' fitting now...' # fit subject length-scale subfit = fit_sub(df, wid) # format into series print 'converting to series...' s_subfit = Series(subfit) # make names more sensible s_subfit.rename({'fun': 'sse', #FIXME: no longer sse; should be cumulative regret 'x': 'fit_ls'}, inplace=True) s_subfit['workerid'] = wid # pickle print 'saving...' s_subfit.to_pickle(subfname) toc = time() print wid + ' saved successfully in ' + str(toc-tic) + ' seconds' else: print wid + 'already run (pass force_run_all=True to force rerun)' # except: # print Warning(wid + ' failed') sys.stdout.flush()
def test_rename_set_name_inplace(self): s = Series(range(3), index=list('abc')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: s.rename(name, inplace=True) self.assertEqual(s.name, name) self.assert_numpy_array_equal(s.index.values, np.array(['a', 'b', 'c']))
def test_rename_set_name_inplace(self): s = Series(range(3), index=list('abc')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: s.rename(name, inplace=True) assert s.name == name exp = np.array(['a', 'b', 'c'], dtype=np.object_) tm.assert_numpy_array_equal(s.index.values, exp)
def test_rename(): # GH 17407 s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex')) result = s.rename(str) expected = s.rename(lambda i: str(i)) assert_series_equal(result, expected) assert result.name == expected.name
def test_rename_set_name_inplace(self): s = Series(range(3), index=list("abc")) for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: s.rename(name, inplace=True) self.assertEqual(s.name, name) exp = np.array(["a", "b", "c"], dtype=np.object_) self.assert_numpy_array_equal(s.index.values, exp)
def test_rename_set_name(self): s = Series(range(4), index=list('abcd')) for name in ['foo', 123, 123., datetime(2001, 11, 11), ('foo',)]: result = s.rename(name) assert result.name == name tm.assert_numpy_array_equal(result.index.values, s.index.values) assert s.name is None
def test_rename_set_name(self): s = Series(range(4), index=list("abcd")) for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: result = s.rename(name) self.assertEqual(result.name, name) self.assert_numpy_array_equal(result.index.values, s.index.values) self.assertTrue(s.name is None)
def test_rename_set_name(self): s = Series(range(4), index=list('abcd')) for name in ['foo', ['foo'], ('foo',)]: result = s.rename(name) self.assertEqual(result.name, name) self.assert_numpy_array_equal(result.index.values, s.index.values) self.assertTrue(s.name is None)
def test_rename(self): renamer = lambda x: x.strftime("%Y%m%d") renamed = self.ts.rename(renamer) self.assertEqual(renamed.index[0], renamer(self.ts.index[0])) # dict rename_dict = dict(zip(self.ts.index, renamed.index)) renamed2 = self.ts.rename(rename_dict) assert_series_equal(renamed, renamed2) # partial dict s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") renamed = s.rename({"b": "foo", "d": "bar"}) self.assert_numpy_array_equal(renamed.index, ["a", "foo", "c", "bar"]) # index with name renamer = Series(np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64") renamed = renamer.rename({}) self.assertEqual(renamed.index.name, renamer.index.name)
def test_groupby_multi_categorical_as_index(self): # GH13204 df = DataFrame({'cat': Categorical([1, 2, 2], [1, 2, 3]), 'A': [10, 11, 11], 'B': [101, 102, 103]}) result = df.groupby(['cat', 'A'], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # function grouper f = lambda r: df.loc[r, 'A'] result = df.groupby(['cat', f], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # another not in-axis grouper s = Series(['a', 'b', 'b'], name='cat2') result = df.groupby(['cat', s], as_index=False).sum() expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10.0, nan, nan, 22.0, nan, nan], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) tm.assert_frame_equal(result, expected) # GH18872: conflicting names in desired index pytest.raises(ValueError, lambda: df.groupby(['cat', s.rename('cat')]).sum()) # is original index dropped? expected = DataFrame({'cat': Categorical([1, 1, 2, 2, 3, 3]), 'A': [10, 11, 10, 11, 10, 11], 'B': [101.0, nan, nan, 205.0, nan, nan]}, columns=['cat', 'A', 'B']) group_columns = ['cat', 'A'] for name in [None, 'X', 'B', 'cat']: df.index = Index(list("abc"), name=name) if name in group_columns and name in df.index.names: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.groupby(group_columns, as_index=False).sum() else: result = df.groupby(group_columns, as_index=False).sum() tm.assert_frame_equal(result, expected, check_index_type=True)
def test_rename(self): renamer = lambda x: x.strftime('%Y%m%d') renamed = self.ts.rename(renamer) self.assertEqual(renamed.index[0], renamer(self.ts.index[0])) # dict rename_dict = dict(zip(self.ts.index, renamed.index)) renamed2 = self.ts.rename(rename_dict) assert_series_equal(renamed, renamed2) # partial dict s = Series(np.arange(4), index=['a', 'b', 'c', 'd'], dtype='int64') renamed = s.rename({'b': 'foo', 'd': 'bar'}) self.assert_index_equal(renamed.index, Index(['a', 'foo', 'c', 'bar'])) # index with name renamer = Series(np.arange(4), index=Index(['a', 'b', 'c', 'd'], name='name'), dtype='int64') renamed = renamer.rename({}) self.assertEqual(renamed.index.name, renamer.index.name)
def test_rename(self): renamer = lambda x: x.strftime('%Y%m%d') renamed = self.ts.rename(renamer) assert renamed.index[0] == renamer(self.ts.index[0]) # dict rename_dict = dict(zip(self.ts.index, renamed.index)) renamed2 = self.ts.rename(rename_dict) assert_series_equal(renamed, renamed2) # partial dict s = Series(np.arange(4), index=['a', 'b', 'c', 'd'], dtype='int64') renamed = s.rename({'b': 'foo', 'd': 'bar'}) tm.assert_index_equal(renamed.index, Index(['a', 'foo', 'c', 'bar'])) # index with name renamer = Series(np.arange(4), index=Index(['a', 'b', 'c', 'd'], name='name'), dtype='int64') renamed = renamer.rename({}) assert renamed.index.name == renamer.index.name
def test_rename_axis_supported(self): # Supporting axis for compatibility, detailed in GH-18589 s = Series(range(5)) s.rename({}, axis=0) s.rename({}, axis='index') with tm.assert_raises_regex(ValueError, 'No axis named 5'): s.rename({}, axis=5)
def test_rename_axis_supported(self): # Supporting axis for compatibility, detailed in GH-18589 s = Series(range(5)) s.rename({}, axis=0) s.rename({}, axis="index") with pytest.raises(ValueError, match="No axis named 5"): s.rename({}, axis=5)
def test_rename_axis_supported(self): # Supporting axis for compatibility, detailed in GH-18589 s = Series(range(5)) s.rename({}, axis=0) s.rename({}, axis='index') with pytest.raises(ValueError, match='No axis named 5'): s.rename({}, axis=5)
def plot_quantile_accuracy_heatmap(actual: pd.Series, prediction: pd.Series, bins: int = 10, normalize: bool = True, color_map: str = 'Oranges', verbose: bool = True) -> None: """Plots the accuracy heatmap with binned actuals/predictions into quantiles. The accuracy of a regression model can be evaluated by visualizing the heatmap of a confusion matrix using the binned values of actual and predicted results. This is particulary useful to roughly understand how accurate your model is rather than relying on purerly numeric regressions performance metrics (eg. RMSE, MAE or MAPE). Args: actual: Series with the actual values. prediction: Series with prediction values. bins: Number of quantile bins to create. normalize: Normalizes the heatmap value with percentages. The percentages in each block of the heatmap, represent the accuracy of the model within the same quantile (each row will add-up to one). color_map: Matplotlib colormap to use in the heatmap. Full list available at https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html. verbose: If True, will print the accuracy of the 1st and 2nd bins. """ actual = actual.rename('actual') prediction = prediction.rename('prediction') heatmap = pd.crosstab(index=pd.qcut(prediction, bins, labels=False) + 1, columns=pd.qcut(actual, bins, labels=False) + 1, normalize='index' if normalize else False).fillna(0) sns.heatmap(heatmap.round(2), annot=True, fmt='g', cmap=color_map) if verbose: accuracy_1st_quantile, accuracy_2nd_quantile = _compute_quantile_accuracies( heatmap) print(f'{accuracy_1st_quantile*100:.2f}% accuracy within 1st quantile') print(f'{accuracy_2nd_quantile*100:.2f}% accuracy within 2nd quantile')
def test_rename(self, datetime_series): ts = datetime_series renamer = lambda x: x.strftime("%Y%m%d") renamed = ts.rename(renamer) assert renamed.index[0] == renamer(ts.index[0]) # dict rename_dict = dict(zip(ts.index, renamed.index)) renamed2 = ts.rename(rename_dict) tm.assert_series_equal(renamed, renamed2) # partial dict s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") renamed = s.rename({"b": "foo", "d": "bar"}) tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) # index with name renamer = Series( np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" ) renamed = renamer.rename({}) assert renamed.index.name == renamer.index.name
def pipe_metrics(self, ds: pd.Series) -> pd.Series: ds = ds.rename({ "1ra. DOSIS": "people_vaccinated", "2da DOSIS": "people_fully_vaccinated", }) ds = enrich_data( ds, "total_vaccinations", ds.people_vaccinated + ds.people_fully_vaccinated + ds["DOSIS UNICA"]) ds.people_vaccinated = ds.people_vaccinated + ds["DOSIS UNICA"] ds.people_fully_vaccinated = ds.people_fully_vaccinated + ds[ "DOSIS UNICA"] ds = ds.drop("DOSIS UNICA") return ds
def plot_segmentation(ser: pd.Series, std_dev: float): cond = adaptative_sampling(ser, std_dev=std_dev) compression_rate = 1 - (sum(cond) / len(cond)) ser_segmented = ser[cond] fig = plot_multilple_series( ser.rename('raw'), ser_segmented.rename('segmented'), kind='scatter', title=f'{ser.name} - compression rate: {compression_rate*100:.1f}%') [g.update(mode='lines') for g in fig.data] fig.data[-1].update(mode='lines+markers') return fig
def _rename_folder(folder_current: pd.Series, beautified_folder: pd.Series): """ Renames existing folder. However, if relevant tags are missing or special cases occur (like mixed artists), then the folder is not touched. """ check_for_none = any(i is None for i in beautified_folder) if check_for_none is True: return # Do not rename any files if there is any None value in the beautified filepath list. else: folder_current = folder_current[0] # This is a pathlib object. beautified_folder = beautified_folder[0] beautified_folder = folder_current.parent / beautified_folder # Only rename the last item of the folder path. Files in subdirectories are that way not moved around. if pathlib.Path(beautified_folder).is_dir(): print( f"[Error] {beautified_folder} already exists. Folder was not renamed." ) else: folder_current.rename(beautified_folder)
def pipe_metrics(self, ds: pd.Series) -> pd.Series: cols = ["PRIMERA DOSIS", "SEGUNDA DOSIS", "UNIDOSIS"] ds.loc[cols] = ds.loc[cols].apply(clean_count) ds = ds.rename( { "PRIMERA DOSIS": "people_vaccinated", "SEGUNDA DOSIS": "people_fully_vaccinated", } ) ds = enrich_data(ds, "total_vaccinations", ds.people_vaccinated + ds.people_fully_vaccinated + ds["UNIDOSIS"]) ds.people_vaccinated = ds.people_vaccinated + ds["UNIDOSIS"] ds.people_fully_vaccinated = ds.people_fully_vaccinated + ds["UNIDOSIS"] ds = ds.drop("UNIDOSIS") return ds
def test_rename(self): renamer = lambda x: x.strftime('%Y%m%d') renamed = self.ts.rename(renamer) self.assertEqual(renamed.index[0], renamer(self.ts.index[0])) # dict rename_dict = dict(zip(self.ts.index, renamed.index)) renamed2 = self.ts.rename(rename_dict) assert_series_equal(renamed, renamed2) # partial dict s = Series(np.arange(4), index=['a', 'b', 'c', 'd']) renamed = s.rename({'b' : 'foo', 'd' : 'bar'}) self.assert_(np.array_equal(renamed.index, ['a', 'foo', 'c', 'bar']))
def sum_deltas_by_truthy_data(data_frame: pd.DataFrame, truthy_frames: pd.Series) -> pd.DataFrame: """ Gets the delta from the pandas data frame for certain players at certain times. :param data_frame: The game data frame, goal frames are removed :param truthy_frames: Frames that have a truth value applied to them. :return: The time based on delta. """ truthy_frames = truthy_frames.rename('truthy') combined_data = pd.concat( [truthy_frames, data_frame['game', 'delta'].rename('delta')], axis=1) return combined_data.loc[combined_data['truthy'] == True].sum().rename( columns={ 'delta': 'true_values' }).delta
def RSI_14(days): RSI_list = [] stock = main_df[Stock_Choice] leny = (len(stock)) leny2 = (len(stock)) - days for i in reversed(stock[days:]): RSI_list.append(RSI(stock, leny, leny2)) leny -= 1 leny2 -= 1 #since it goes backwards will give error on first days since it lacks enough data ie -2 index RSI_list2 = [0] * days #revers list so it can merged with first index as last date for i in reversed(RSI_list): RSI_list2.append(i) #make AAPL a dataframe not a series stock = main_df[['AAPL']] #set index to colums so it can merge stock.reset_index(inplace=True) #make list into a series RSI_list2 = Series(RSI_list2) RSI_list2.rename('RSI', inplace=True) #merge Final_Result = pd.concat([stock, RSI_list2], axis=1) Final_Result = Final_Result.set_index('Date') Final_Result.drop('AAPL', axis=1, inplace=True) lenny3 = len(Final_Result) - 1 print(Final_Result) ax3 = Final_Result[days:].plot(kind='line') ax3.set_title('{} {} day RSI is {}'.format( Stock_Choice, days + 1, round(Final_Result['RSI'][lenny3], 2))) plt.axhline(y=30, xmin=0, xmax=3, c="red", linewidth=1, zorder=1) plt.axhline(y=70, xmin=0, xmax=3, c="red", linewidth=1, zorder=1)
def test_to_frame_respects_name_none(self): # GH#44212 if we explicitly pass name=None, then that should be respected, # not changed to 0 # GH-45448 this is first deprecated to only change in the future ser = Series(range(3)) with tm.assert_produces_warning(FutureWarning): result = ser.to_frame(None) # exp_index = Index([None], dtype=object) exp_index = Index([0]) tm.assert_index_equal(result.columns, exp_index) with tm.assert_produces_warning(FutureWarning): result = ser.rename("foo").to_frame(None) exp_index = Index(["foo"], dtype=object) tm.assert_index_equal(result.columns, exp_index)
def save(cls, result: pd.Series) -> None: """Merge result into the database""" result = result.rename( {"name": "operator", "alias": "operator_alias", "fscore": "confidence"} ) result = result.to_frame().T[ ["operator", "operator_alias", "confidence", "method"] ] try: cls.table.merge_records(result) cls.table.persist() logger.debug(f"Updated {result.operator_alias} in database") except Exception as e: logger.error(f"Could not update database -- {e}")
def QA_fetch_factor_start_date(factor: pd.Series) -> pd.DataFrame: """ 获取因子池上市时间,本地获取接口,使用前先保存股票基本信息 """ factor = QA_fmt_factor(factor.copy()) merged_data = pd.DataFrame(factor.rename("factor")) # 股票代码格式化 stock_list = QA_fmt_code_list( factor.index.get_level_values("code").drop_duplicates()) # 上市时间获取 df_local = QA_fetch_stock_basic(status=None).set_index("code") intersection = df_local.index.intersection(stock_list) ss = df_local.loc[intersection]["list_date"] # 拼接上市时间 merged_data = merged_data.loc[(slice(None), list(ss.index)), :] merged_data["start_date"] = merged_data.index.map( lambda x: ss.loc[x[1]]).tolist() return merged_data
def get_timeserie_properties(series: pd.Series, submonths: list = None, scale_trend_intercept=False, auto_corr_at: list = [1, 5]) -> pd.Series: """ Function to be called on a timeseries (does not need to be contiguous) Extracts some statistics that can be of interest and returns them as a Series If you want only certain months submonths of the series to be taken into account that provide those as a list of integers Also it tries to lag the series with a given number of days, to compute autocorrelation """ if not submonths is None: series = series.loc[series.index.month.map(lambda m: m in submonths)] n_nan = series.isna().sum() series = series.dropna() # Remove nans lm = LinearRegression() if scale_trend_intercept: lm.fit(y=scale(series), X=series.index.year.values.reshape(-1, 1)) else: lm.fit(y=series, X=series.index.year.values.reshape(-1, 1)) trend = float(lm.coef_) # (standardized) coefficient / yr intercept = lm.intercept_ # Smoothness is the autocorrelation at a lag of 1 day and 5 days. results = pd.Series({ 'std': std, 'mean': mean, 'length': length, 'n_nan': n_nan, 'trend': trend, 'intercept': intercept }) for lag in auto_corr_at: lagged = pd.Series(series.values, index=series.index - pd.Timedelta(f'{lag}D'), name=f'{lag}D') results.loc[f'auto{lag}'] = pd.merge( left=series.rename('unlagged'), right=lagged, left_index=True, right_index=True, how='inner' ).corr().iloc[ 0, -1] # Potential bug here in pd merge when the series is not correctly named, because then it searches for columns return (results)
def plot_linear_and_segmentation(ser: pd.Series, std_dev: float): (ser_interp, ser_filt, ser_smooth) = signal_preprocess_linear(ser, output_type='all') cond = adaptative_sampling(ser_smooth, std_dev=std_dev) compression_rate = 1 - (sum(cond) / len(cond)) ser_segmented = ser_smooth[cond] fig = plot_multilple_series( ser.rename('raw'), signal_resample(ser, resample_method='').rename('resample'), ser_interp.rename('interpolate'), ser_filt.rename('filter'), ser_smooth.rename('smooth'), ser_segmented.rename('segmented'), kind='scatter', title=f'{ser.name} - compression rate: {compression_rate*100:.1f}%') [g.update(mode='lines') for g in fig.data] fig.data[-1].update(mode='lines+markers') return fig
def test_rename_series_with_multiindex(self): # issue #43659 arrays = [ ["bar", "baz", "baz", "foo", "qux"], ["one", "one", "two", "two", "one"], ] index = MultiIndex.from_arrays(arrays, names=["first", "second"]) s = Series(np.ones(5), index=index) result = s.rename(index={"one": "yes"}, level="second", errors="raise") arrays_expected = [ ["bar", "baz", "baz", "foo", "qux"], ["yes", "yes", "two", "two", "yes"], ] index_expected = MultiIndex.from_arrays(arrays_expected, names=["first", "second"]) series_expected = Series(np.ones(5), index=index_expected) tm.assert_series_equal(result, series_expected)
def time_aware_data_split(X: pd.DataFrame, y: pd.Series, start_series: pd.Series, test_size: float = 0.2): """The time aware data split shall return a train and a test set. The test-set needs to be balanced. Time awareness is applicable, when the test-set consists of the newest students in the data set. :param X: the values of the dataset :param y: the labels of the dataset :param test_size: the balance of the test-set (Default is 0.2) :param start_series: the data storing the start semester. This must be a pd.Series with student id as index. (Default path is ./data/output/cleaned/hzb.csv) :return: X_train, X_test, y_train, y_test""" # Concatenate values with labels, so that they are strictly together. X[y.name] = y data = X # Concatenate the values and labels with the semester it was taken # `join='inner'` handles, that only indices, that are present in both # DataFrames, are kept. In the end data will be overridden. data: pd.DataFrame = pd.concat( [data, start_series.rename('Semester')], axis=1, join='inner') # Sorting the DataFrame by column makes it possible to do a time aware # split. data = data.sort_values(by=['Semester']) # Since sklearn adopted the non shuffled train_test_split, we can use it # for the real data split. For this purpose, the values and labels need # to be extracted again from the computed DataFrame x = data.drop(columns=['Semester', 'Label']) y = data['Label'] # delete `data` for computational safety. del data # Use the actual train_test_split of sklearn library without shuffling # the data return train_test_split(x, y, test_size=test_size, shuffle=False)
def default_series_rename(series: pd.Series, name: str = 'tmp') -> pd.Series: return series.rename(name)
def translate_index(input: pd.Series) -> pd.Series: return input.rename({ 'first_vaccine_number': 'people_vaccinated', 'second_vaccine_number': 'people_fully_vaccinated', })
def make_pie_chart(index: int, values: pd.Series, title: str, axs): """ Builds a pie chart at [index] of a 2, 3 subplot with [values] and [title] Args: - index(int): the location of the pie chart. Increases right to left and up to down - values(pd.Series): The pandas series contain both the labels of the individual portion of the chart and the actual values for each clade - title(str): the title of the chart - axs: The maplotlib Axs to plot the pie chart on """ def get_index(index: int) -> List[int]: index -= 1 if index >= 3: index -= 3 return [1, index] return [0, index] labels = OrderedDict( [ ("O", "O"), ("S", "S"), ("L", "L"), ("V", "V"), ("G", "G"), ("Gn", "G+S477X"), ("GH", "GH"), ("GHn", "GH+S477X"), ("GR", "GR"), ("GRn", "GR+S477X"), ("GV", "GV"), ("GVn", "GV+S477X"), ] ) # Must correspond to order in data colors = { "G": "#ffcccc", "G+S477X": "#ffcccc", "GH": "#f4b183", "GH+S477X": "#f4b183", "GR": "#ff7c80", "GR+S477X": "#ff7c80", "GV": "#f08bb5", "GV+S477X": "#f08bb5", "L": "#d9d9d9", "O": "#808080", "S": "#70ad47", "V": "#ff99ff", } to_hatch = ["G+S477X", "GH+S477X", "GR+S477X", "GV+S477X"] indices = get_index(index) # print(title, values) values.rename(index=labels, inplace=True) # print("renamed", values) normalize = values.values.sum(axis=0) != 0 axs[indices[0], indices[1]].set_title(label=title, fontdict={"fontsize": 10}) pie = axs[indices[0], indices[1]].pie( values.values, normalize=normalize, labeldistance=None, colors=colors.values() ) plotted_val = values[values > 0] plotted_clades = sorted( plotted_val.index, key=lambda x: plotted_val[x], reverse=True ) wedges = list(map(lambda x: (x, x.theta2 - x.theta1), pie[0])) wedges = sorted(wedges, key=lambda x: x[1], reverse=True) for wedge, clade in zip(wedges, plotted_clades): # print(wedge, clade) # print(f"{clade} in {to_hatch} = {clade in to_hatch}") if clade in to_hatch: wedge[0].set_hatch("+++") wedges = {} for key, value in colors.items(): patch = mpatches.Patch( facecolor=value, hatch="+++" if key in to_hatch else "", label=key ) wedges[key] = patch return wedges
def test_rename_partial_dict(self): # partial dict ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") renamed = ser.rename({"b": "foo", "d": "bar"}) tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
def variants_vaccines(rate_age_pattern: pd.Series, denom_age_pattern: pd.Series, age_spec_population: pd.Series, rate: pd.Series, day_shift: int, escape_variant_prevalence: pd.Series, severity_variant_prevalence: pd.Series, vaccine_coverage: pd.DataFrame, population: pd.Series, variant_risk_ratio: float, verbose: bool = True,): escape_variant_prevalence = escape_variant_prevalence.reset_index() escape_variant_prevalence['date'] += pd.Timedelta(days=day_shift) escape_variant_prevalence = (escape_variant_prevalence .set_index(['location_id', 'date']) .loc[:, 'escape_variant_prevalence']) escape_variant_prevalence = pd.concat([rate, escape_variant_prevalence], axis=1) # borrow axis escape_variant_prevalence = escape_variant_prevalence['escape_variant_prevalence'].fillna(0) severity_variant_prevalence = severity_variant_prevalence.reset_index() severity_variant_prevalence['date'] += pd.Timedelta(days=day_shift) severity_variant_prevalence = (severity_variant_prevalence .set_index(['location_id', 'date']) .loc[:, 'severity_variant_prevalence']) severity_variant_prevalence = pd.concat([rate, severity_variant_prevalence], axis=1) # borrow axis severity_variant_prevalence = severity_variant_prevalence['severity_variant_prevalence'].fillna(0) lr_e = [f'cumulative_lr_effective_{variant_suffix}' for variant_suffix in ['wildtype', 'variant']] lr_ep = [f'cumulative_lr_effective_protected_{variant_suffix}' for variant_suffix in ['wildtype', 'variant']] hr_e = [f'cumulative_hr_effective_{variant_suffix}' for variant_suffix in ['wildtype', 'variant']] hr_ep = [f'cumulative_hr_effective_protected_{variant_suffix}' for variant_suffix in ['wildtype', 'variant']] vaccine_coverage = (vaccine_coverage .loc[:, lr_e + lr_ep + hr_e + hr_ep] .reset_index()) vaccine_coverage['date'] += pd.Timedelta(days=day_shift) vaccine_coverage = vaccine_coverage.set_index(['location_id', 'date']) vaccine_coverage = pd.concat([rate.rename('rate'), vaccine_coverage], axis=1) # borrow axis del vaccine_coverage['rate'] vaccine_coverage = vaccine_coverage.fillna(0) # not super necessary... numerator = pd.Series(100, index=rate.index) numerator /= population denominator_a = (numerator / rate) denominator_ev = (numerator / (rate * variant_risk_ratio)) denominator_sv = denominator_ev.copy() denominator_a *= (1 - (escape_variant_prevalence + severity_variant_prevalence)[denominator_a.index]) denominator_ev *= escape_variant_prevalence[denominator_ev.index] denominator_sv *= severity_variant_prevalence[denominator_sv.index] numerator_a = (rate * denominator_a) numerator_ev = (rate * variant_risk_ratio * denominator_ev) numerator_sv = (rate * variant_risk_ratio * denominator_sv) if verbose: logger.info('Adjusting ancestral...') numerator_lr_a, numerator_hr_a, denominator_lr_a, denominator_hr_a = adjust_by_variant_classification( numerator=numerator_a, denominator=denominator_a, variant_suffixes=['wildtype', 'variant',], rate_age_pattern=rate_age_pattern, denom_age_pattern=denom_age_pattern, age_spec_population=age_spec_population, vaccine_coverage=vaccine_coverage, population=population, ) if verbose: logger.info('Adjusting non-escape...') numerator_lr_sv, numerator_hr_sv, denominator_lr_sv, denominator_hr_sv = adjust_by_variant_classification( numerator=numerator_sv, denominator=denominator_sv, variant_suffixes=['wildtype', 'variant'], rate_age_pattern=rate_age_pattern, denom_age_pattern=denom_age_pattern, age_spec_population=age_spec_population, vaccine_coverage=vaccine_coverage, population=population, ) if verbose: logger.info('Adjusting escape...') numerator_lr_ev, numerator_hr_ev, denominator_lr_ev, denominator_hr_ev = adjust_by_variant_classification( numerator=numerator_ev, denominator=denominator_ev, variant_suffixes=['variant',], rate_age_pattern=rate_age_pattern, denom_age_pattern=denom_age_pattern, age_spec_population=age_spec_population, vaccine_coverage=vaccine_coverage, population=population, ) numerator_lr = numerator_lr_a + numerator_lr_ev + numerator_lr_sv denominator_lr = denominator_lr_a + denominator_lr_ev + denominator_lr_sv numerator_hr = numerator_hr_a + numerator_hr_ev + numerator_hr_sv denominator_hr = denominator_hr_a + denominator_hr_ev + denominator_hr_sv rate = (numerator_lr + numerator_hr) / (denominator_lr + denominator_hr) rate_lr = numerator_lr / denominator_lr rate_hr = numerator_hr / denominator_hr pct_inf_lr = denominator_lr / (denominator_lr + denominator_hr) pct_inf_hr = denominator_hr / (denominator_lr + denominator_hr) return rate, rate_lr, rate_hr, pct_inf_lr, pct_inf_hr
plt.show() name = f"RandomForest (depth={best})" model = RandomForestRegressor(max_depth=best, random_state=0).fit(X_train, Y_train) test[name] = mean_squared_error(Y_test, model.predict(X_test)) train[name] = mean_squared_error(Y_train, model.predict(X_train)) final_models[name] = model DataFrame({'name': name, 'train': np.sqrt(train[name]), 'test': np.sqrt(test[name])}, index=['RMSE']) # show top feature Importances top_n = 20 imp = Series(model.feature_importances_, index=X.columns).sort_values() DataFrame.from_dict({i+1: {'importance': imp[s], 'series_id': s, 'description': alf.header(s)} for i, s in enumerate(np.flip(imp.index[-top_n:]))}, orient='index') # Plot summary of model RMSE's fig, ax = plt.subplots(num=1, clear=True, figsize=(10,6)) np.sqrt(train.rename('train').to_frame().join(test.rename('test')))\ .sort_values('test').plot.barh(ax=ax, width=0.85) ax.yaxis.set_tick_params(labelsize=10) ax.set_title('Regression RMSE') ax.figure.subplots_adjust(left=0.35) plt.savefig(os.path.join(imgdir, 'rmse.jpg')) plt.show()
def test_rename_method_and_index(self): # GH 40977 ser = Series([1, 2]) with pytest.raises(TypeError, match="Cannot specify both 'mapper' and 'index'"): ser.rename(str, index=str)
def translate_index(ds: pd.Series) -> pd.Series: return ds.rename({ "Общ брой лица със завършен ваксинационен цикъл": "people_fully_vaccinated", "Общо поставени дози": "total_vaccinations", })
def test_rename_none(self): # GH 40977 ser = Series([1, 2], name="foo") result = ser.rename(None) expected = Series([1, 2]) tm.assert_series_equal(result, expected)
def test_rename_mi(self): s = Series([11,21,31], index=MultiIndex.from_tuples([("A",x) for x in ["a","B","c"]])) result = s.rename(str.lower)
def test_rename_by_series(self): ser = Series(range(5), name="foo") renamer = Series({1: 10, 2: 20}) result = ser.rename(renamer) expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") tm.assert_series_equal(result, expected)
def translate_index(ds: pd.Series) -> pd.Series: return ds.rename({ 'Общо ваксинирани лицас втора доза': 'people_fully_vaccinated', 'Общо поставени дози': 'total_vaccinations', })
def test_rename_mi(self): s = Series( [11, 21, 31], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]), ) s.rename(str.lower)
def test_rename_by_series(self): s = Series(range(5), name='foo') renamer = Series({1: 10, 2: 20}) result = s.rename(renamer) expected = Series(range(5), index=[0, 10, 20, 3, 4], name='foo') tm.assert_series_equal(result, expected)