def to_datetime(self, format="%Y-%m-%d %H:%M:%S", col=None): """ Run pd.to_datetime on timestamp column """ if col is None: col = self._time_field logging.debug("Running pd.to_datetime on {} ".format(col)) self.df[col] = pd.to_datetime(self.df[col])
def normalize_time_to_datetime(self, format="%Y-%m-%d %H:%M:%S", col=None): """ Apply datetime format to timestamp column """ if col is None: col = self._time_field logging.debug("setting {} to datetime ".format(col)) self.df[col] = self.df[col].apply(lambda x: datetime.datetime.strptime(x, format), 1)
def map_column(self, col, func): """Apply a function to col """ logging.debug("Mapping col:{}".format(col)) if col not in self.df.columns: self.df[col] = 'unknown' self.df[col] = self.df[col].apply(func, 1)
def epoch_time_to_datetime(self, col=None): """Format col as datetime """ if col is None: col = self._time_field logging.debug("Running to_datetime on {} ".format(col)) self.df[col] = self.df[col].apply( lambda x: pd.to_datetime(x, unit='s'), 1)
def copy_column(self, to, fro): """Copy column data Args: fro (str): column name to copy data from to (str): column name to copy to """ logging.debug("copying {} to {}".format(fro, to)) self.df[to] = self.df[fro]
def trim_timeseries(self, start, stop): """ Get time range Args: start (datetime): start of time-sereies trim stop (datetime): stop of time-sereies trim """ logging.debug("trim_timeseries start:{} stop:{} ".format(start, stop)) self.df = self.df.loc[(self.df[self._time_field] >= start) & (self.df[self._time_field] <= stop), :]
def ensure_columns(self, std): """ Set column types to specified type Args: std (dict): """ for col in list(std.keys()): logging.debug("checking {} is astype {} ".format(col, std[col])) if col not in self.df.columns: if std[col] == 'float64': self.df[col] = float('nan') else: self.df[col] = None self.df[col] = self.df[col].astype(std[col]) self.df = self.df[list(std.keys())]
def rbind(self, tt): """Append data """ logging.debug("appending tt.df") self.df = self.df.append(tt.df)
def unique(self, col): """Get unique values of a column """ logging.debug("unique col:{}".format(col)) return self.df[col].unique()
def pandas_merge(self, right, right_cols, how='left', on='id'): """ Run merge with data """ logging.debug("merging right:{} right_cols:{} ".format( right, right_cols)) self.df = self.df.merge(right.loc[:, right_cols], how=how, on=on)
def rbind(self, tt): '''Append data ''' logging.debug("appending tt.df") self.df = self.df.append(tt.df)