def __init__(self, fname, category=None, start_date=None, end_date=None): self.parser = CapitalOneParser(fname, start_date=start_date, end_date=end_date) self.merchant_parser = MerchantParser(self.parser.get_dataframe(), category=category) self.categories = self.parser.get_categories() self.cardholders = self.parser.get_cardholders() self.min_date = self.parser.get_min_date().to_pydatetime() self.max_date = self.parser.get_max_date().to_pydatetime() self.num_months = max( TransactionDateUtil.get_num_months_between_dates( self.min_date, self.max_date), 1) self.num_weeks = max( TransactionDateUtil.get_num_weeks_between_dates( self.min_date, self.max_date), 1) self.num_days = max( TransactionDateUtil.get_num_days_between_dates( self.min_date, self.max_date), 1) self.spending_per_category = None self.spending_per_category_per_cardholder = None self.percent_per_category_per_cardholder = None self.percentage_per_category = None self.total_spent = None self.total_spent_per_cardholder = None
def __init__(self, fname, start_date=None, end_date=None): self.parser = CapitalOneParser(fname, start_date=start_date, end_date=end_date) self.df = self.parser.get_dataframe() self.merchant_parser = MerchantParser(self.df) self.merchant_cleaner = MerchantStringCleaner() self.merchant_matcher = MerchantDescriptionMatcher() self.description_column_index = 3
def merge_data(initial_fname, csvs, output): initial_data = CapitalOneParser(initial_fname) for fname in csvs: new_data = CapitalOneParser(fname) logging.debug(f"Merging data from {fname} into {initial_fname}") initial_data.add_data(new_data.get_dataframe()) initial_data.write(output)
class CapitalOneCleaner: def __init__(self, fname, start_date=None, end_date=None): self.parser = CapitalOneParser(fname, start_date=start_date, end_date=end_date) self.df = self.parser.get_dataframe() self.merchant_parser = MerchantParser(self.df) self.merchant_cleaner = MerchantStringCleaner() self.merchant_matcher = MerchantDescriptionMatcher() self.description_column_index = 3 def clean(self, outfile): retailers = self.merchant_parser.get_retailers() cleaned_merchants = self.df[DESCRIPTION].apply( self.merchant_cleaner.clean_merchant) closest_match = lambda description: self.merchant_matcher.find_closest_match( description, retailers) self.df[DESCRIPTION] = cleaned_merchants.apply(closest_match) self.df.to_csv(outfile, index=False)
def __init__(self, filename, start, finish): self.filename = filename self.parser = CapitalOneParser(filename, start_date=start, end_date=finish) self.categories = self.parser.get_all_categories()
class Categorizer: def __init__(self, filename, start, finish): self.filename = filename self.parser = CapitalOneParser(filename, start_date=start, end_date=finish) self.categories = self.parser.get_all_categories() def categorize(self, category): todo = True while todo: self.__update_categories() retailer = self.__prompt_retailer(category) if not retailer: OutputHelper.echo_no_transactions_in_category(category) break self.categorize_retailer(retailer, write=False) todo = self.__prompt_continue() write_file = OutputHelper.confirm_overwrite(self.filename) self.parser.write(write_file) def categorize_retailer(self, retailer, write=False): new_category = self.__prompt_categorize() self.parser.update_categories_for_retailer(retailer, new_category) if write: write_file = OutputHelper.confirm_overwrite(self.filename) self.parser.write(write_file) def __prompt_categorize(self): questions = [{ 'type': 'confirm', 'name': 'existing', 'message': 'Would you like to add this to an existing category?', 'default': True }, { 'type': 'list', 'name': 'category', 'message': 'Select an existing category.', 'choices': self.categories, 'when': lambda answers: answers['existing'] }, { 'type': 'input', 'name': 'category', 'message': 'Enter a new category:', 'when': lambda answers: not answers['existing'] }] new_category_answers = prompt(questions) return new_category_answers["category"] def __prompt_retailer(self, category): uncategorized = self.parser.get_unique_transactions_for_category( category) if len(uncategorized) == 0: return None question = [{ 'type': 'list', 'name': 'retailer', 'message': 'Select a retailer to recategorize', 'choices': uncategorized }] return prompt(question)["retailer"] def __prompt_continue(self): to_continue = [{ 'type': 'confirm', 'name': 'continue', 'message': "Continue categorizing?", 'default': False }] return prompt(to_continue)["continue"] def __update_categories(self): self.categories = self.parser.get_all_categories()
class CapitalOneAnalyzer: def __init__(self, fname, category=None, start_date=None, end_date=None): self.parser = CapitalOneParser(fname, start_date=start_date, end_date=end_date) self.merchant_parser = MerchantParser(self.parser.get_dataframe(), category=category) self.categories = self.parser.get_categories() self.cardholders = self.parser.get_cardholders() self.min_date = self.parser.get_min_date().to_pydatetime() self.max_date = self.parser.get_max_date().to_pydatetime() self.num_months = max( TransactionDateUtil.get_num_months_between_dates( self.min_date, self.max_date), 1) self.num_weeks = max( TransactionDateUtil.get_num_weeks_between_dates( self.min_date, self.max_date), 1) self.num_days = max( TransactionDateUtil.get_num_days_between_dates( self.min_date, self.max_date), 1) self.spending_per_category = None self.spending_per_category_per_cardholder = None self.percent_per_category_per_cardholder = None self.percentage_per_category = None self.total_spent = None self.total_spent_per_cardholder = None def get_spending_per_category(self): if not self.spending_per_category: self.spending_per_category = self.__analyze_per_category() return self.spending_per_category def get_spending_per_category_per_cardholder(self): if not self.spending_per_category_per_cardholder: self.spending_per_category_per_cardholder = self.__analyze_per_cardholder( ) return self.spending_per_category_per_cardholder def get_percentage_per_category(self): if not self.percentage_per_category: self.percentage_per_category = self.__analyze_percent_per_category( ) return self.percentage_per_category def get_percent_per_category_per_cardholder(self): if not self.percent_per_category_per_cardholder: self.percent_per_category_per_cardholder = self.__analyze_percent_per_category_per_cardholder( ) return self.percent_per_category_per_cardholder def get_total_income(self): return self.parser.sum_total_income() def get_total_income_per_source(self): incomes = dict() sources = self.parser.get_income_sources() for source in sources: incomes[source] = abs( self.parser.sum_total_income_for_source(source)) return incomes def get_total_payment_credit(self): return self.parser.sum_total_payment_credit() def get_total_spending(self): if not self.total_spent: self.total_spent = self.parser.sum_total_spending() logging.debug(f"Raw total spent {self.total_spent}") return self.total_spent def get_total_spending_per_cardholder(self): if not self.total_spent_per_cardholder: self.total_spent_per_cardholder = dict() for cardholder in self.cardholders: self.total_spent_per_cardholder[ cardholder] = self.parser.sum_total_spending_per_cardholder( cardholder) return self.total_spent_per_cardholder def get_total_spending_for_retailer(self, retailer): # TODO: also keep this in a cache when there is an interactive mode return self.merchant_parser.sum_for_retailer(retailer) def get_total_spending_per_retailer(self, order_by): # Returns sorted dictionary # Takes forever TODO: add a progress bar with click retailers = self.merchant_parser.get_retailers() results = dict() for retailer in retailers: results[retailer] = self.merchant_parser.sum_for_retailer(retailer) if order_by == "number_of_transactions": return { k: v for k, v in sorted(results.items(), key=lambda item: self.merchant_parser. count_for_retailer(item[0]), reverse=True) } return { k: v for k, v in sorted( results.items(), key=lambda item: item[1], reverse=True) } def get_average_and_count_for_retailer(self, retailer): total = self.get_total_spending_for_retailer(retailer) count = self.merchant_parser.count_for_retailer(retailer) if count > 0: return total / count, count else: return 0, 0 def get_average_monthly_spending(self): return self.get_total_spending() / self.num_months def get_average_monthly_spending_for_category(self, category): # TODO: need validation around categories return self.get_spending_per_category()[category] / self.num_months def get_average_monthly_spending_for_retailer(self, retailer): return self.get_total_spending_for_retailer(retailer) / self.num_months def get_average_monthly_spending_for_cardholder(self, cardholder): return self.get_total_spending_per_cardholder( )[cardholder] / self.num_months def get_average_monthly_spending_for_cardholder_for_category( self, cardholder, category): return self.get_spending_per_category_per_cardholder( )[cardholder][category] / self.num_months def get_average_weekly_spending(self): return self.get_total_spending() / self.num_weeks def get_average_weekly_spending_for_category(self, category): return self.get_spending_per_category()[category] / self.num_weeks def get_average_weekly_spending_for_retailer(self, retailer): return self.get_total_spending_for_retailer(retailer) / self.num_weeks def get_average_weekly_spending_for_cardholder(self, cardholder): return self.get_total_spending_per_cardholder( )[cardholder] / self.num_weeks def get_average_weekly_spending_for_cardholder_for_category( self, cardholder, category): return self.get_spending_per_category_per_cardholder( )[cardholder][category] / self.num_weeks def get_average_daily_spending(self): return self.get_total_spending() / self.num_days def get_average_daily_spending_for_category(self, category): return self.get_spending_per_category()[category] / self.num_days def get_average_daily_spending_for_retailer(self, retailer): return self.get_total_spending_for_retailer(retailer) / self.num_days def get_average_daily_spending_for_cardholder(self, cardholder): return self.get_total_spending_per_cardholder( )[cardholder] / self.num_days def get_average_daily_spending_for_cardholder_for_category( self, cardholder, category): return self.get_spending_per_category_per_cardholder( )[cardholder][category] / self.num_days def __analyze_per_category(self): # TODO treat income and Payment/Credit differently # Return a dictionary of spending per category spending_per_category = dict() for category in self.categories: total_for_category = self.parser.sum_total_category(category) spending_per_category[category] = total_for_category return spending_per_category def __analyze_per_cardholder(self): spending_per_cardholder = dict() for cardholder in self.cardholders: spending_per_cardholder[cardholder] = dict() for category in self.categories: total_for_category = self.parser.sum_total_category_per_cardholder( category, cardholder) spending_per_cardholder[cardholder][ category] = total_for_category return spending_per_cardholder def __analyze_percent_per_category(self): total = self.get_total_spending() spending_per_category = self.get_spending_per_category() percentage_per_category = dict() for category in self.categories: percentage_per_category[ category] = spending_per_category[category] / total return percentage_per_category def __analyze_percent_per_category_per_cardholder(self): percent_per_category_per_cardholder = dict() spending_per_category_per_cc = self.get_spending_per_category_per_cardholder( ) for cardholder in self.cardholders: total = self.get_total_spending_per_cardholder()[cardholder] percent_per_category_per_cardholder[cardholder] = dict() for category in self.categories: percent_per_category_per_cardholder[cardholder][ category] = spending_per_category_per_cc[cardholder][ category] / total return percent_per_category_per_cardholder
def categorize(self, categorize_file): parser = CapitalOneParser(categorize_file) merchant_parser = MerchantParser(parser.df, confidence_level=.87) self.df["Category"] = self.df["Description"].apply( merchant_parser.get_category_for_retailer)