def test_first_last_valid(self): N = len(self.frame.index) mat = randn(N) mat[:5] = nan mat[-5:] = nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() assert index == frame.index[5] index = frame.last_valid_index() assert index == frame.index[-6] # GH12800 empty = DataFrame() assert empty.last_valid_index() is None assert empty.first_valid_index() is None # GH17400: no valid entries frame[:] = nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None # GH20499: its preserves freq with holes frame.index = date_range("20110101", periods=N, freq="B") frame.iloc[1] = 1 frame.iloc[-2] = 1 assert frame.first_valid_index() == frame.index[1] assert frame.last_valid_index() == frame.index[-2] assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq
def time_filter_data(dataframe: pd.DataFrame, timestamp_start: int = None, timestamp_end: int = None) -> pd.DataFrame: """reduce a dataframe based on the provided times start and end timestamp. It is assumed that the provided time stamp are not necessarily in the data, an approximation is used to slice as accurately as possible. If start is not provided, it is assumed to be the start of the data frame. If end is not provided its assumed to be the end of the data frame. Note: the index will be sorted in order to enable slicing Args: dataframe (pd.DataFrame): Data frame to be sliced timestamp_start (int): index of first data point (inclusive, unix timestamp) . timestamp_end (int): index of last data point (inclusive, unix time stamp) Returns: dataframe (pd.DataFrame): sliced pd DataFrame. """ dataframe = dataframe.sort_index() if timestamp_start is None: print("start index was not provided") timestamp_start = dataframe.first_valid_index() if timestamp_end is None: print("end index is not provided") timestamp_end = dataframe.last_valid_index() reduced_dataframe = dataframe[(dataframe.index > timestamp_start) & (dataframe.index < timestamp_end)] return reduced_dataframe
def _augment_account(cls, account: pd.DataFrame) -> pd.DataFrame: account_isins = set(isin for isins in account["isins"].tolist() if isins for isin in isins) prices_df = fund_cache.get_prices(account_isins) prices_ratios_df = prices_df.pct_change() + 1 values_series = account.reindex(index=prices_df.index)["value"] isins_series = account.reindex(index=prices_df.index, method="bfill")["isins"] augmented = pd.concat([values_series, isins_series], axis=1) \ .truncate(before=account.first_valid_index(), after=account.last_valid_index()) last_valid_index_loc = augmented.index.get_loc(values_series.last_valid_index()) # Bfill from last valid entry for i in range(last_valid_index_loc, 0, -1): dt = augmented.index[i] curr_value, prev_value = augmented.iloc[i]["value"], augmented.iloc[i - 1]["value"] isins = augmented.iloc[i]["isins"] if np.isnan(prev_value): if not isins: augmented.at[augmented.index[i - 1], "value"] = curr_value else: augmented.at[augmented.index[i - 1], "value"] = \ curr_value / prices_ratios_df.loc[dt, isins].mean() # Ffill from last valid entry to today for i in range(last_valid_index_loc, len(augmented.index) - 1): dt = augmented.index[i] curr_value, next_value = augmented.iloc[i]["value"], augmented.iloc[i + 1]["value"] next_isins = augmented.iloc[i + 1]["isins"] if np.isnan(next_value): if not next_isins: augmented.at[augmented.index[i + 1], "value"] = curr_value else: augmented.at[augmented.index[i + 1], "value"] = \ curr_value * prices_ratios_df.loc[dt, next_isins].mean() augmented["value"] = augmented["value"].bfill() # fill initial NaNs with 100 return augmented
def slice_by_index(dataframe: pd.DataFrame, timestamp_start: int = None, timestamp_end: int = None) -> pd.DataFrame: """cuts out the data in between the timestamps given and returns the data to both sides of the time range given. If one start is not provided, it is assumed to be the start of the data frame. If end is not provided its assumed to be the end of the data frame Args: dataframe (pd.DataFrame): Data frame to be sliced timestamp_start (int): index of first data point (inclusive, unix timestamp) . timestamp_end (int): index of last data point (inclusive, unix time stamp) Returns: dataframe (pd.DataFrame): sliced pd DataFrame. """ if timestamp_start is None: timestamp_start = dataframe.first_valid_index() if timestamp_end is None: timestamp_end = dataframe.last_valid_index() dataframe = dataframe[(dataframe.index < timestamp_start) | (dataframe.index > timestamp_end)] return dataframe
def test_first_last_valid_preserves_freq(self): # GH#20499: its preserves freq with holes index = date_range("20110101", periods=30, freq="B") frame = DataFrame(np.nan, columns=["foo"], index=index) frame.iloc[1] = 1 frame.iloc[-2] = 1 assert frame.first_valid_index() == frame.index[1] assert frame.last_valid_index() == frame.index[-2] assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq ts = frame["foo"] assert ts.first_valid_index() == ts.index[1] assert ts.last_valid_index() == ts.index[-2] assert ts.first_valid_index().freq == ts.index.freq assert ts.last_valid_index().freq == ts.index.freq
def test_first_last_valid(self): N = len(self.frame.index) mat = randn(N) mat[:5] = nan mat[-5:] = nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() assert index == frame.index[5] index = frame.last_valid_index() assert index == frame.index[-6] # GH12800 empty = DataFrame() assert empty.last_valid_index() is None assert empty.first_valid_index() is None
def test_first_last_valid(self): N = len(self.frame.index) mat = randn(N) mat[:5] = nan mat[-5:] = nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() self.assertEqual(index, frame.index[5]) index = frame.last_valid_index() self.assertEqual(index, frame.index[-6]) # GH12800 empty = DataFrame() self.assertIsNone(empty.last_valid_index()) self.assertIsNone(empty.first_valid_index())
def get_kickoffs_from_game( game: Game, proto_game: game_pb2, id_creation: Callable, player_map: Dict[str, Player], data_frame: pd.DataFrame, kickoff_frames: pd.DataFrame, first_touch_frames: pd.DataFrame) -> Dict[int, KickoffStats]: kickoffs = dict() goals = proto_game.game_metadata.goals num_goals = len(goals) last_frame = data_frame.last_valid_index() first_frame = data_frame.first_valid_index() for index, frame in enumerate(kickoff_frames): starting_kickoff_time = data_frame.game.time[frame] cur_kickoff = proto_game.game_stats.kickoff_stats.add() end_frame = first_touch_frames[index] smaller_data_frame = data_frame.loc[ max(first_frame, frame - 1):min(end_frame + 20, last_frame)] cur_kickoff.start_frame = frame cur_kickoff.touch_frame = end_frame ending_time = smaller_data_frame['game']['time'][end_frame] time = cur_kickoff.touch_time = ending_time - starting_kickoff_time differs = smaller_data_frame['game']['time'][frame:end_frame].diff( ) summed_time_diff = differs.sum() summed_time = smaller_data_frame['game']['delta'][ frame:end_frame].sum() if summed_time > 0: cur_kickoff.touch_time = summed_time logger.error("STRAIGHT TIME " + str(time)) logger.error("SUM TIME" + str(summed_time)) sum_vs_adding_diff = time - summed_time # find who touched the ball first closest_player_distance = 10000000 closest_player_id = 0 if index < num_goals: BaseKickoff.get_goal_data(cur_kickoff, goals[index], data_frame) # get player stats for player in player_map.values(): if player.name not in data_frame: continue kickoff_player = BaseKickoff.get_player_stats( cur_kickoff, player, smaller_data_frame, frame, end_frame) if kickoff_player.ball_dist < closest_player_distance: closest_player_distance = kickoff_player.ball_dist closest_player_id = player.id.id if closest_player_distance != 10000000: # Todo use hit analysis cur_kickoff.touch.first_touch_player.id = closest_player_id cur_kickoff.type = BaseKickoff.get_kickoff_type( cur_kickoff.touch.players) kickoffs[frame] = cur_kickoff return kickoffs
def _to_returns(cls, augmented: pd.DataFrame) -> Dict[str, Optional[float]]: returns = dict() lookbacks = properties.get("fund.lookbacks") dt = augmented.last_valid_index() for lookback in lookbacks: window = augmented[dt - pd_offset_from_lookback(lookback): dt]["value"] returns[lookback] = (window.iat[-1] - window.iat[0]) / window.iat[0] \ if len(window.index) \ else None return returns
def test_first_last_valid_all_nan(self, index_func): # GH#17400: no valid entries index = index_func(30) frame = DataFrame(np.nan, columns=["foo"], index=index) assert frame.last_valid_index() is None assert frame.first_valid_index() is None ser = frame["foo"] assert ser.first_valid_index() is None assert ser.last_valid_index() is None
def test_first_last_valid(self, index_func): N = 30 index = index_func(N) mat = np.random.randn(N) mat[:5] = np.nan mat[-5:] = np.nan frame = DataFrame({"foo": mat}, index=index) assert frame.first_valid_index() == frame.index[5] assert frame.last_valid_index() == frame.index[-6] ser = frame["foo"] assert ser.first_valid_index() == frame.index[5] assert ser.last_valid_index() == frame.index[-6]
def get_dataframe_daterange( self, dataframe: DataFrame) -> Tuple[Timestamp, Timestamp]: """Returns the daterange for the passed DataFrame Args: dataframe: DataFrame to parse Returns: tuple (Timestamp, Timestamp): Start and end Timestamps for data """ from pandas import DatetimeIndex from openghg.util import timestamp_tzaware if not isinstance(dataframe.index, DatetimeIndex): raise TypeError( "Only DataFrames with a DatetimeIndex must be passed") # Here we want to make the pandas Timestamps timezone aware start = timestamp_tzaware(dataframe.first_valid_index()) end = timestamp_tzaware(dataframe.last_valid_index()) return start, end
def ohlcv_resample(ohlcv: pd.DataFrame, **kwargs): period = int(kwargs.get('period', 7)) interval = kwargs.get('interval', 'D') process_fun = kwargs.get('process_fun', lambda x: x) rename_fun = kwargs.get('rename_fun', None) result = [] df = ohlcv.sort_index() for i in range(period): _df = df.iloc[i:] nth_day = _df.resample('{}{}'.format(period, interval), closed='left', label='right', convention='end', kind='timestamp' ).agg({'open': 'first', 'high': 'max', 'low': 'min', 'close': 'last', 'volume': 'sum'}).copy() result.append(process_fun(nth_day)) _result = pd.concat(result, sort=True).sort_index() if rename_fun: _result.columns = rename_fun([c for c in _result.columns]) if kwargs.get('trim', True): _result = _result.loc[ohlcv.first_valid_index():ohlcv.last_valid_index()] return _result
def force_full_index(dataframe: pd.DataFrame, resampling_step: int = None, resampling_unit: str = "min", timestamp_start: int = None, timestamp_end: int = None) -> pd.DataFrame: """ forces a full index. Missing index will be replaced by Nan. Note: resampling should be done before to benefit from sampling strategies. Args: dataframe(dataframe): data frame containing NaN values resampling_step (int, 8): This is the desired time step of final dataframe. resampling_unit (str, 't'): unit of desired time step timestamp_start (string, none): index at which the dataframe starts timestamp_end (string, none): index at which the dataframe ends Returns dataframe(pandas.Dataframe): dataframe with full index """ if timestamp_start is None: print("start index was not provided") timestamp_start = dataframe.first_valid_index() if timestamp_end is None: print("end index is not provided") timestamp_end = dataframe.last_valid_index() freq = str(resampling_step) + resampling_unit new_index = pd.date_range(start=timestamp_start, end=timestamp_end, freq=freq) new_index = new_index.astype(numpy.int64) // 10**9 delta_time_tmp = dataframe.reindex(index=new_index, fill_value=numpy.nan) return delta_time_tmp
def test_first_last_valid( self, float_frame, data, idx, expected_first, expected_last ): N = len(float_frame.index) mat = np.random.randn(N) mat[:5] = np.nan mat[-5:] = np.nan frame = DataFrame({"foo": mat}, index=float_frame.index) index = frame.first_valid_index() assert index == frame.index[5] index = frame.last_valid_index() assert index == frame.index[-6] # GH12800 empty = DataFrame() assert empty.last_valid_index() is None assert empty.first_valid_index() is None # GH17400: no valid entries frame[:] = np.nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None # GH20499: its preserves freq with holes frame.index = date_range("20110101", periods=N, freq="B") frame.iloc[1] = 1 frame.iloc[-2] = 1 assert frame.first_valid_index() == frame.index[1] assert frame.last_valid_index() == frame.index[-2] assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq # GH 21441 df = DataFrame(data, index=idx) assert expected_first == df.first_valid_index() assert expected_last == df.last_valid_index()
def test_first_last_valid(self, data, idx, expected_first, expected_last): N = len(self.frame.index) mat = randn(N) mat[:5] = np.nan mat[-5:] = np.nan frame = DataFrame({'foo': mat}, index=self.frame.index) index = frame.first_valid_index() assert index == frame.index[5] index = frame.last_valid_index() assert index == frame.index[-6] # GH12800 empty = DataFrame() assert empty.last_valid_index() is None assert empty.first_valid_index() is None # GH17400: no valid entries frame[:] = np.nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None # GH20499: its preserves freq with holes frame.index = date_range("20110101", periods=N, freq="B") frame.iloc[1] = 1 frame.iloc[-2] = 1 assert frame.first_valid_index() == frame.index[1] assert frame.last_valid_index() == frame.index[-2] assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq # GH 21441 df = DataFrame(data, index=idx) assert expected_first == df.first_valid_index() assert expected_last == df.last_valid_index()
class Player2: def __init__(self, first_name, last_name, id): self.first_name = first_name self.last_name = last_name self.id = id self.batting_stats_df = DataFrame(columns=[ "at_bats", "avg", "hits", "runs", "rbi", "hr", "slg", "obp", "ops", "fldg", "batting_order", "bb", "sb", "cs", "e", "hbp", "so", "sac", "sf", "lob", "fly_outs", "put_outs", "assists", "ground_outs", "season_hits", "season_runs", "season_hr", "season_rbi", "season_so", "season_bb", "d", "t" ]) self.pitching_stats_df = DataFrame(columns=[ "hits", "runs", "hr", "bb", "so", "season_hits", "season_runs", "season_hr", "season_so", "season_bb", "loses", "wins", "saves", "er", "hold", "blown_saves", "outs_recorded", "batters_faced", "game_score", "era", "num_pitches", "win", "loss", "save", "season_er", "season_ip", "s" ]) def __str__(self): return str.format('{0} : {1}', self.id, self.last_name) def __repr__(self): return self.__str__() def add_stats(self, stats, date): if (hasattr(stats, 'ab')): self.add_batting_stats(stats, date) else: self.add_pitching_stats(stats, date) def add_pitching_stats(self, stats, date): win = 1 if (hasattr(stats, 'win') and stats.win) else 0 loss = 1 if (hasattr(stats, 'loss') and stats.loss) else 0 save = 1 if (hasattr(stats, 'save') and stats.save) else 0 seaason_hr = stats.s_hr if (hasattr(stats, 's_hr')) else 0 note = stats.note if (hasattr(stats, 'note')) else '' if (not self.pitching_stats_df.empty and self.pitching_stats_df.last_valid_index() == date): print('same day', date) self.pitching_stats_df.loc[date] = [ stats.h + self.pitching_stats_df.loc[date, 'hits'], stats.r + self.pitching_stats_df.loc[date, 'runs'], stats.hr + self.pitching_stats_df.loc[date, 'hr'], stats.bb + self.pitching_stats_df.loc[date, 'bb'], stats.so + self.pitching_stats_df.loc[date, 'so'], stats.s_h, stats.s_r, seaason_hr, stats.s_so, stats.s_bb, stats.l, stats.w, stats.sv, stats.er + self.pitching_stats_df.loc[date, 'er'], stats.hld + self.pitching_stats_df.loc[date, 'hold'], stats.bs + self.pitching_stats_df.loc[date, 'blown_saves'], stats.out + self.pitching_stats_df.loc[date, 'outs_recorded'], stats.bf + self.pitching_stats_df.loc[date, 'batters_faced'], (stats.game_score + self.pitching_stats_df.loc[date, 'game_score']) / 2, stats.era, stats.np + self.pitching_stats_df.loc[date, 'num_pitches'], win + self.pitching_stats_df.loc[date, 'win'], loss + self.pitching_stats_df.loc[date, 'loss'], save + self.pitching_stats_df.loc[date, 'save'], stats.s_er, stats.s_ip, stats.s + self.pitching_stats_df.loc[date, 's'] ] else: self.pitching_stats_df.loc[date] = [ stats.h, stats.r, stats.hr, stats.bb, stats.so, stats.s_h, stats.s_r, seaason_hr, stats.s_so, stats.s_bb, stats.l, stats.w, stats.sv, stats.er, stats.hld, stats.bs, stats.out, stats.bf, stats.game_score, stats.era, stats.np, win, loss, save, stats.s_er, stats.s_ip, stats.s ] def add_batting_stats(self, stats, date): go = stats.go if (hasattr(stats, 'go')) else 0 bo = stats.bo if (hasattr(stats, 'bo')) else 0 slg = stats.slg if (hasattr(stats, 'slg')) else 0 obp = stats.obp if (hasattr(stats, 'obp')) else 0 ops = stats.ops if (hasattr(stats, 'ops')) else 0 if (not self.batting_stats_df.empty and self.batting_stats_df.last_valid_index() == date): print('same day', date) self.batting_stats_df.loc[date] = [ self.batting_stats_df.loc[date, 'at_bats'] + stats.ab, stats.avg, stats.h + self.batting_stats_df.loc[date, 'hits'], stats.r + self.batting_stats_df.loc[date, 'runs'], stats.rbi + self.batting_stats_df.loc[date, 'rbi'], stats.hr + self.batting_stats_df.loc[date, 'hr'], slg, obp, ops, (stats.fldg + self.batting_stats_df.loc[date, 'fldg']) / 2, bo, stats.bb + self.batting_stats_df.loc[date, 'bb'], stats.sb + self.batting_stats_df.loc[date, 'sb'], stats.cs + self.batting_stats_df.loc[date, 'cs'], stats.e + self.batting_stats_df.loc[date, 'e'], stats.hbp + self.batting_stats_df.loc[date, 'hbp'], stats.so + self.batting_stats_df.loc[date, 'so'], stats.sac + self.batting_stats_df.loc[date, 'sac'], stats.sf + self.batting_stats_df.loc[date, 'sf'], stats.lob + self.batting_stats_df.loc[date, 'lob'], stats.ao + self.batting_stats_df.loc[date, 'fly_outs'], stats.po + self.batting_stats_df.loc[date, 'put_outs'], stats.a + self.batting_stats_df.loc[date, 'assists'], go + self.batting_stats_df.loc[date, 'ground_outs'], stats.s_h, stats.s_r, stats.s_hr, stats.s_rbi, stats.s_so, stats.s_bb, stats.d + self.batting_stats_df.loc[date, 'd'], stats.t + self.batting_stats_df.loc[date, 't'] ] else: self.batting_stats_df.loc[date] = [ stats.ab, stats.avg, stats.h, stats.r, stats.rbi, stats.hr, slg, obp, ops, stats.fldg, bo, stats.bb, stats.sb, stats.cs, stats.e, stats.hbp, stats.so, stats.sac, stats.sf, stats.lob, stats.ao, stats.po, stats.a, go, stats.s_h, stats.s_r, stats.s_hr, stats.s_rbi, stats.s_so, stats.s_bb, stats.d, stats.t ] def name(self): return self.first_name + " " + self.last_name
sklearn.metrics.confusion_matrix(tar_test, predictions) sklearn.metrics.accuracy_score(tar_test, predictions) # fit an Extra Trees model to the data model = ExtraTreesClassifier() model.fit(pred_train, tar_train) # display the relative importance of each attribute var_name = (pred_train.columns.tolist()) var_sig = (list(model.feature_importances_)) # combine to 1 data frame var_imp = DataFrame(columns=var_name) var_imp.loc['Imp'] = [list(model.feature_importances_)[n] for n in range(24)] # sort data frame var_imp[var_imp.ix[var_imp.last_valid_index()].argsort()[::-1]] """ Running a different number of trees and see the effect of that on the accuracy of the prediction """ trees = range(25) accuracy = np.zeros(25) for idx in range(len(trees)): classifier = RandomForestClassifier(n_estimators=idx + 1) classifier = classifier.fit(pred_train, tar_train) predictions = classifier.predict(pred_test) accuracy[idx] = sklearn.metrics.accuracy_score(tar_test, predictions) plt.cla()
def test_first_last_valid_frame(self, data, idx, expected_first, expected_last): # GH#21441 df = DataFrame(data, index=idx) assert expected_first == df.first_valid_index() assert expected_last == df.last_valid_index()
class Player2: def __init__(self, first_name, last_name, id): self.first_name = first_name self.last_name = last_name self.id = id self.batting_stats_df = DataFrame(columns=["at_bats", "avg", "hits", "runs", "rbi", "hr", "slg", "obp", "ops", "fldg", "batting_order", "bb", "sb", "cs", "e", "hbp", "so", "sac", "sf", "lob", "fly_outs", "put_outs", "assists", "ground_outs", "season_hits", "season_runs", "season_hr", "season_rbi", "season_so", "season_bb", "d", "t"]) self.pitching_stats_df = DataFrame(columns=[ "hits", "runs", "hr", "bb", "so", "season_hits", "season_runs", "season_hr", "season_so", "season_bb", "loses", "wins", "saves", "er", "hold", "blown_saves", "outs_recorded", "batters_faced", "game_score", "era", "num_pitches", "win", "loss", "save", "season_er", "season_ip", "s"]) def __str__(self): return str.format('{0} : {1}', self.id, self.last_name) def __repr__(self): return self.__str__() def add_stats(self, stats, date): if(hasattr(stats, 'ab') ): self.add_batting_stats(stats, date) else : self.add_pitching_stats(stats, date) def add_pitching_stats(self, stats, date): win = 1 if (hasattr(stats, 'win') and stats.win ) else 0 loss = 1 if (hasattr(stats, 'loss') and stats.loss ) else 0 save = 1 if (hasattr(stats, 'save') and stats.save ) else 0 seaason_hr = stats.s_hr if (hasattr(stats, 's_hr')) else 0 note = stats.note if(hasattr(stats, 'note')) else '' if(not self.pitching_stats_df.empty and self.pitching_stats_df.last_valid_index() == date ): print('same day', date ) self.pitching_stats_df.loc[date] = [ stats.h + self.pitching_stats_df.loc[date, 'hits'] , stats.r + self.pitching_stats_df.loc[date, 'runs'] , stats.hr + self.pitching_stats_df.loc[date, 'hr'] , stats.bb + self.pitching_stats_df.loc[date, 'bb'] , stats.so + self.pitching_stats_df.loc[date, 'so'] , stats.s_h, stats.s_r, seaason_hr, stats.s_so, stats.s_bb, stats.l, stats.w, stats.sv, stats.er + self.pitching_stats_df.loc[date, 'er'] , stats.hld + self.pitching_stats_df.loc[date, 'hold'] , stats.bs + self.pitching_stats_df.loc[date, 'blown_saves'] , stats.out + self.pitching_stats_df.loc[date, 'outs_recorded'] , stats.bf + self.pitching_stats_df.loc[date, 'batters_faced'] , (stats.game_score + self.pitching_stats_df.loc[date, 'game_score'])/2 , stats.era, stats.np + self.pitching_stats_df.loc[date, 'num_pitches'] , win + self.pitching_stats_df.loc[date, 'win'] , loss + self.pitching_stats_df.loc[date, 'loss'] , save + self.pitching_stats_df.loc[date, 'save'] , stats.s_er, stats.s_ip, stats.s + self.pitching_stats_df.loc[date, 's'] ] else: self.pitching_stats_df.loc[date] = [stats.h, stats.r, stats.hr, stats.bb, stats.so, stats.s_h, stats.s_r, seaason_hr, stats.s_so, stats.s_bb, stats.l, stats.w, stats.sv, stats.er, stats.hld, stats.bs, stats.out, stats.bf, stats.game_score, stats.era, stats.np, win, loss, save, stats.s_er, stats.s_ip, stats.s] def add_batting_stats(self, stats, date): go = stats.go if (hasattr(stats, 'go')) else 0 bo = stats.bo if (hasattr(stats, 'bo')) else 0 slg = stats.slg if(hasattr(stats, 'slg')) else 0 obp = stats.obp if(hasattr(stats, 'obp')) else 0 ops = stats.ops if(hasattr(stats, 'ops')) else 0 if(not self.batting_stats_df.empty and self.batting_stats_df.last_valid_index() == date ): print('same day', date ) self.batting_stats_df.loc[date] = [self.batting_stats_df.loc[date, 'at_bats'] + stats.ab, stats.avg, stats.h + self.batting_stats_df.loc[date, 'hits'] , stats.r + self.batting_stats_df.loc[date, 'runs'] , stats.rbi + self.batting_stats_df.loc[date, 'rbi'] , stats.hr + self.batting_stats_df.loc[date, 'hr'] , slg, obp, ops, (stats.fldg + self.batting_stats_df.loc[date, 'fldg'])/2, bo, stats.bb + self.batting_stats_df.loc[date, 'bb'] , stats.sb + self.batting_stats_df.loc[date, 'sb'] , stats.cs + self.batting_stats_df.loc[date, 'cs'] , stats.e + self.batting_stats_df.loc[date, 'e'] , stats.hbp + self.batting_stats_df.loc[date, 'hbp'] , stats.so + self.batting_stats_df.loc[date, 'so'] , stats.sac + self.batting_stats_df.loc[date, 'sac'] , stats.sf + self.batting_stats_df.loc[date, 'sf'] , stats.lob + self.batting_stats_df.loc[date, 'lob'], stats.ao + self.batting_stats_df.loc[date, 'fly_outs'] , stats.po + self.batting_stats_df.loc[date, 'put_outs'] , stats.a + self.batting_stats_df.loc[date, 'assists'] , go + self.batting_stats_df.loc[date, 'ground_outs'] , stats.s_h, stats.s_r, stats.s_hr, stats.s_rbi, stats.s_so, stats.s_bb, stats.d + self.batting_stats_df.loc[date, 'd'] , stats.t + self.batting_stats_df.loc[date, 't'] ] else: self.batting_stats_df.loc[date] = [stats.ab, stats.avg, stats.h, stats.r, stats.rbi, stats.hr, slg, obp, ops, stats.fldg, bo, stats.bb, stats.sb, stats.cs, stats.e, stats.hbp, stats.so, stats.sac, stats.sf, stats.lob, stats.ao, stats.po, stats.a, go, stats.s_h, stats.s_r, stats.s_hr, stats.s_rbi, stats.s_so, stats.s_bb, stats.d, stats.t] def name(self): return self.first_name + " " + self.last_name