def clean_frame(df):

    # remove negative nums
    df = clean.map_remove(df, 'amount_raised', clean.neg_num)
    df = clean.map_remove(df, 'amount_goal', clean.neg_num)
    df = clean.map_remove(df, 'category_id', clean.neg_num)
    df = clean.map_remove(df, 'platform_id', clean.neg_num)
    df = clean.map_remove(df, 'amount_raised', clean.neg_num)
    df = clean.map_remove(df, 'amount_goal', clean.neg_num)

    # remove outliers
    df = outlier.remove_outliers(df, 5, 'amount_goal')
    df = outlier.remove_outliers(df, 5, 'amount_raised')

    # add amount needed column to df
    df['amount_needed'] = df.amount_goal - df.amount_raised
    # add completed column
    df.completed = [df.amount_needed == 0]

    # tokenize keywords using NLTK library
    #df.description = keyw.tokenizeElements(df, 'description')

    return df
 def test_raise_keyerror_when_index_not_in_dataframe(self):
     self.assertRaises(KeyError, v.remove_outliers(df, 4, 'qwerty'))
 def test_delete_row_containing_300(self):
     df2 = pd.DataFrame({'Data': [1, 2, 3, 4, 9,
                                  300]})  # example dataset with one outlier
     print('hh')
     print(v.remove_outliers(df2, 3, 'Data'))
     self.assertEqual(v.remove_outliers(df2, 2, 'Data').Data.count(), 5)
 def test_should_accept_int_for_arg2(self):
     self.assertEqual(
         v.remove_outliers(df, 4, 'Data').__class__.__name__, 'DataFrame')
 def test_should_accept_digit_string_for_arg2(self):
     self.assertEqual(
         v.remove_outliers(df, '4', 'Data').__class__.__name__, 'DataFrame')
 def test_should_raise_attributeerror_for_non_dataframe(self):
     self.assertRaises(AttributeError, v.remove_outliers('g', 3, 'Data'))