def clean_frame(df): # remove negative nums df = clean.map_remove(df, 'amount_raised', clean.neg_num) df = clean.map_remove(df, 'amount_goal', clean.neg_num) df = clean.map_remove(df, 'category_id', clean.neg_num) df = clean.map_remove(df, 'platform_id', clean.neg_num) df = clean.map_remove(df, 'amount_raised', clean.neg_num) df = clean.map_remove(df, 'amount_goal', clean.neg_num) # remove outliers df = outlier.remove_outliers(df, 5, 'amount_goal') df = outlier.remove_outliers(df, 5, 'amount_raised') # add amount needed column to df df['amount_needed'] = df.amount_goal - df.amount_raised # add completed column df.completed = [df.amount_needed == 0] # tokenize keywords using NLTK library #df.description = keyw.tokenizeElements(df, 'description') return df
def test_raise_keyerror_when_index_not_in_dataframe(self): self.assertRaises(KeyError, v.remove_outliers(df, 4, 'qwerty'))
def test_delete_row_containing_300(self): df2 = pd.DataFrame({'Data': [1, 2, 3, 4, 9, 300]}) # example dataset with one outlier print('hh') print(v.remove_outliers(df2, 3, 'Data')) self.assertEqual(v.remove_outliers(df2, 2, 'Data').Data.count(), 5)
def test_should_accept_int_for_arg2(self): self.assertEqual( v.remove_outliers(df, 4, 'Data').__class__.__name__, 'DataFrame')
def test_should_accept_digit_string_for_arg2(self): self.assertEqual( v.remove_outliers(df, '4', 'Data').__class__.__name__, 'DataFrame')
def test_should_raise_attributeerror_for_non_dataframe(self): self.assertRaises(AttributeError, v.remove_outliers('g', 3, 'Data'))