def clean_frame(df): # remove negative nums df = clean.map_remove(df, 'amount_raised', clean.neg_num) df = clean.map_remove(df, 'amount_goal', clean.neg_num) df = clean.map_remove(df, 'category_id', clean.neg_num) df = clean.map_remove(df, 'platform_id', clean.neg_num) df = clean.map_remove(df, 'amount_raised', clean.neg_num) df = clean.map_remove(df, 'amount_goal', clean.neg_num) # remove outliers df = outlier.remove_outliers(df, 5, 'amount_goal') df = outlier.remove_outliers(df, 5, 'amount_raised') # add amount needed column to df df['amount_needed'] = df.amount_goal - df.amount_raised # add completed column df.completed = [df.amount_needed == 0] # tokenize keywords using NLTK library #df.description = keyw.tokenizeElements(df, 'description') return df
def test_returns_dataframe_arg1(self): t = (m.map_remove(df2, 'goal', m.neg_num)) self.assertEquals(t.__class__.__name__, 'DataFrame')
def test_raise_IndexError_for_out_of_range(self): self.assertRaises(IndexError, (m.map_remove(df, 90, m.neg_num)))
def test_raise_TypeError_for_non__arg2(self): self.assertRaises(TypeError, (m.map_remove(df, [2, 2], m.neg_num)))
def test_raise_TypeError_for_non_dataframe_arg1(self): self.assertRaises(TypeError, (m.map_remove('qwerty', 3, m.neg_num)))
def test_should_remove_rows_where_function_true_for_col(self): self.assertEquals(len(m.map_remove(df, 0, m.neg_num)), 3) self.assertEquals(len(m.map_remove(df, 6, m.neg_num)), 4) self.assertEquals(len(m.map_remove(df, 2, m.neg_num)), 3)