def transform(self, df, **transform_params): # sanity checks is_dataframe(df) print df return df
def transform(self, df, **transform_params): # sanity checks is_dataframe(df) # if the lag window leads to no successful joins on date, this just returns an empty dataframe # should that be a test? return pd.concat([df.shift(l).add_suffix("_L" + str(l)) for l in range(0, self.lag + 1)], axis=1).dropna()
def test_is_dataframe(self): self.assertRaises(NotDataFrame, is_dataframe, self.not_a_df) # df1 is a dataframe, so is_dataframe(self.df1) should not raise an exception # if it does, then this is a problem try: is_dataframe(self.df1) except NotDataFrame: self.fail("is_dataframe() raised NotDataFrame unexpectedly!")
def transform(self, df, **transform_params): # sanity checks is_dataframe(df) is_column_subset( self.col_labels, df.columns.tolist() ) # extract columns by label return df[self.col_labels]
def transform(self, df, **transform_params): # sanity checks is_dataframe(df) is_correct_length(df,2) # Should we be checking for divide by zero? # It's unlikely in practice with real data, but what if something funky gets passed to this function? return pd.DataFrame((df.iloc[:,1].sub(df.iloc[:,0])).div(df.iloc[:,0]))