Esempio n. 1
0
 def __init__(self, fname, category=None, start_date=None, end_date=None):
     self.parser = CapitalOneParser(fname,
                                    start_date=start_date,
                                    end_date=end_date)
     self.merchant_parser = MerchantParser(self.parser.get_dataframe(),
                                           category=category)
     self.categories = self.parser.get_categories()
     self.cardholders = self.parser.get_cardholders()
     self.min_date = self.parser.get_min_date().to_pydatetime()
     self.max_date = self.parser.get_max_date().to_pydatetime()
     self.num_months = max(
         TransactionDateUtil.get_num_months_between_dates(
             self.min_date, self.max_date), 1)
     self.num_weeks = max(
         TransactionDateUtil.get_num_weeks_between_dates(
             self.min_date, self.max_date), 1)
     self.num_days = max(
         TransactionDateUtil.get_num_days_between_dates(
             self.min_date, self.max_date), 1)
     self.spending_per_category = None
     self.spending_per_category_per_cardholder = None
     self.percent_per_category_per_cardholder = None
     self.percentage_per_category = None
     self.total_spent = None
     self.total_spent_per_cardholder = None
Esempio n. 2
0
 def __init__(self, fname, start_date=None, end_date=None):
     self.parser = CapitalOneParser(fname,
                                    start_date=start_date,
                                    end_date=end_date)
     self.df = self.parser.get_dataframe()
     self.merchant_parser = MerchantParser(self.df)
     self.merchant_cleaner = MerchantStringCleaner()
     self.merchant_matcher = MerchantDescriptionMatcher()
     self.description_column_index = 3
Esempio n. 3
0
 def merge_data(initial_fname, csvs, output):
     initial_data = CapitalOneParser(initial_fname)
     for fname in csvs:
         new_data = CapitalOneParser(fname)
         logging.debug(f"Merging data from {fname} into {initial_fname}")
         initial_data.add_data(new_data.get_dataframe())
     initial_data.write(output)
Esempio n. 4
0
class CapitalOneCleaner:
    def __init__(self, fname, start_date=None, end_date=None):
        self.parser = CapitalOneParser(fname,
                                       start_date=start_date,
                                       end_date=end_date)
        self.df = self.parser.get_dataframe()
        self.merchant_parser = MerchantParser(self.df)
        self.merchant_cleaner = MerchantStringCleaner()
        self.merchant_matcher = MerchantDescriptionMatcher()
        self.description_column_index = 3

    def clean(self, outfile):
        retailers = self.merchant_parser.get_retailers()
        cleaned_merchants = self.df[DESCRIPTION].apply(
            self.merchant_cleaner.clean_merchant)
        closest_match = lambda description: self.merchant_matcher.find_closest_match(
            description, retailers)
        self.df[DESCRIPTION] = cleaned_merchants.apply(closest_match)
        self.df.to_csv(outfile, index=False)
Esempio n. 5
0
 def __init__(self, filename, start, finish):
     self.filename = filename
     self.parser = CapitalOneParser(filename,
                                    start_date=start,
                                    end_date=finish)
     self.categories = self.parser.get_all_categories()
Esempio n. 6
0
class Categorizer:
    def __init__(self, filename, start, finish):
        self.filename = filename
        self.parser = CapitalOneParser(filename,
                                       start_date=start,
                                       end_date=finish)
        self.categories = self.parser.get_all_categories()

    def categorize(self, category):
        todo = True
        while todo:
            self.__update_categories()
            retailer = self.__prompt_retailer(category)
            if not retailer:
                OutputHelper.echo_no_transactions_in_category(category)
                break
            self.categorize_retailer(retailer, write=False)
            todo = self.__prompt_continue()
        write_file = OutputHelper.confirm_overwrite(self.filename)
        self.parser.write(write_file)

    def categorize_retailer(self, retailer, write=False):
        new_category = self.__prompt_categorize()
        self.parser.update_categories_for_retailer(retailer, new_category)
        if write:
            write_file = OutputHelper.confirm_overwrite(self.filename)
            self.parser.write(write_file)

    def __prompt_categorize(self):
        questions = [{
            'type': 'confirm',
            'name': 'existing',
            'message': 'Would you like to add this to an existing category?',
            'default': True
        }, {
            'type': 'list',
            'name': 'category',
            'message': 'Select an existing category.',
            'choices': self.categories,
            'when': lambda answers: answers['existing']
        }, {
            'type': 'input',
            'name': 'category',
            'message': 'Enter a new category:',
            'when': lambda answers: not answers['existing']
        }]
        new_category_answers = prompt(questions)
        return new_category_answers["category"]

    def __prompt_retailer(self, category):
        uncategorized = self.parser.get_unique_transactions_for_category(
            category)
        if len(uncategorized) == 0:
            return None
        question = [{
            'type': 'list',
            'name': 'retailer',
            'message': 'Select a retailer to recategorize',
            'choices': uncategorized
        }]
        return prompt(question)["retailer"]

    def __prompt_continue(self):
        to_continue = [{
            'type': 'confirm',
            'name': 'continue',
            'message': "Continue categorizing?",
            'default': False
        }]
        return prompt(to_continue)["continue"]

    def __update_categories(self):
        self.categories = self.parser.get_all_categories()
Esempio n. 7
0
class CapitalOneAnalyzer:
    def __init__(self, fname, category=None, start_date=None, end_date=None):
        self.parser = CapitalOneParser(fname,
                                       start_date=start_date,
                                       end_date=end_date)
        self.merchant_parser = MerchantParser(self.parser.get_dataframe(),
                                              category=category)
        self.categories = self.parser.get_categories()
        self.cardholders = self.parser.get_cardholders()
        self.min_date = self.parser.get_min_date().to_pydatetime()
        self.max_date = self.parser.get_max_date().to_pydatetime()
        self.num_months = max(
            TransactionDateUtil.get_num_months_between_dates(
                self.min_date, self.max_date), 1)
        self.num_weeks = max(
            TransactionDateUtil.get_num_weeks_between_dates(
                self.min_date, self.max_date), 1)
        self.num_days = max(
            TransactionDateUtil.get_num_days_between_dates(
                self.min_date, self.max_date), 1)
        self.spending_per_category = None
        self.spending_per_category_per_cardholder = None
        self.percent_per_category_per_cardholder = None
        self.percentage_per_category = None
        self.total_spent = None
        self.total_spent_per_cardholder = None

    def get_spending_per_category(self):
        if not self.spending_per_category:
            self.spending_per_category = self.__analyze_per_category()
        return self.spending_per_category

    def get_spending_per_category_per_cardholder(self):
        if not self.spending_per_category_per_cardholder:
            self.spending_per_category_per_cardholder = self.__analyze_per_cardholder(
            )
        return self.spending_per_category_per_cardholder

    def get_percentage_per_category(self):
        if not self.percentage_per_category:
            self.percentage_per_category = self.__analyze_percent_per_category(
            )
        return self.percentage_per_category

    def get_percent_per_category_per_cardholder(self):
        if not self.percent_per_category_per_cardholder:
            self.percent_per_category_per_cardholder = self.__analyze_percent_per_category_per_cardholder(
            )
        return self.percent_per_category_per_cardholder

    def get_total_income(self):
        return self.parser.sum_total_income()

    def get_total_income_per_source(self):
        incomes = dict()
        sources = self.parser.get_income_sources()
        for source in sources:
            incomes[source] = abs(
                self.parser.sum_total_income_for_source(source))
        return incomes

    def get_total_payment_credit(self):
        return self.parser.sum_total_payment_credit()

    def get_total_spending(self):
        if not self.total_spent:
            self.total_spent = self.parser.sum_total_spending()
            logging.debug(f"Raw total spent {self.total_spent}")
        return self.total_spent

    def get_total_spending_per_cardholder(self):
        if not self.total_spent_per_cardholder:
            self.total_spent_per_cardholder = dict()
            for cardholder in self.cardholders:
                self.total_spent_per_cardholder[
                    cardholder] = self.parser.sum_total_spending_per_cardholder(
                        cardholder)
        return self.total_spent_per_cardholder

    def get_total_spending_for_retailer(self, retailer):
        # TODO: also keep this in a cache when there is an interactive mode
        return self.merchant_parser.sum_for_retailer(retailer)

    def get_total_spending_per_retailer(self, order_by):
        # Returns sorted dictionary
        # Takes forever TODO: add a progress bar with click
        retailers = self.merchant_parser.get_retailers()
        results = dict()
        for retailer in retailers:
            results[retailer] = self.merchant_parser.sum_for_retailer(retailer)
        if order_by == "number_of_transactions":
            return {
                k: v
                for k, v in sorted(results.items(),
                                   key=lambda item: self.merchant_parser.
                                   count_for_retailer(item[0]),
                                   reverse=True)
            }
        return {
            k: v
            for k, v in sorted(
                results.items(), key=lambda item: item[1], reverse=True)
        }

    def get_average_and_count_for_retailer(self, retailer):
        total = self.get_total_spending_for_retailer(retailer)
        count = self.merchant_parser.count_for_retailer(retailer)
        if count > 0:
            return total / count, count
        else:
            return 0, 0

    def get_average_monthly_spending(self):
        return self.get_total_spending() / self.num_months

    def get_average_monthly_spending_for_category(self, category):
        # TODO: need validation around categories
        return self.get_spending_per_category()[category] / self.num_months

    def get_average_monthly_spending_for_retailer(self, retailer):
        return self.get_total_spending_for_retailer(retailer) / self.num_months

    def get_average_monthly_spending_for_cardholder(self, cardholder):
        return self.get_total_spending_per_cardholder(
        )[cardholder] / self.num_months

    def get_average_monthly_spending_for_cardholder_for_category(
            self, cardholder, category):
        return self.get_spending_per_category_per_cardholder(
        )[cardholder][category] / self.num_months

    def get_average_weekly_spending(self):
        return self.get_total_spending() / self.num_weeks

    def get_average_weekly_spending_for_category(self, category):
        return self.get_spending_per_category()[category] / self.num_weeks

    def get_average_weekly_spending_for_retailer(self, retailer):
        return self.get_total_spending_for_retailer(retailer) / self.num_weeks

    def get_average_weekly_spending_for_cardholder(self, cardholder):
        return self.get_total_spending_per_cardholder(
        )[cardholder] / self.num_weeks

    def get_average_weekly_spending_for_cardholder_for_category(
            self, cardholder, category):
        return self.get_spending_per_category_per_cardholder(
        )[cardholder][category] / self.num_weeks

    def get_average_daily_spending(self):
        return self.get_total_spending() / self.num_days

    def get_average_daily_spending_for_category(self, category):
        return self.get_spending_per_category()[category] / self.num_days

    def get_average_daily_spending_for_retailer(self, retailer):
        return self.get_total_spending_for_retailer(retailer) / self.num_days

    def get_average_daily_spending_for_cardholder(self, cardholder):
        return self.get_total_spending_per_cardholder(
        )[cardholder] / self.num_days

    def get_average_daily_spending_for_cardholder_for_category(
            self, cardholder, category):
        return self.get_spending_per_category_per_cardholder(
        )[cardholder][category] / self.num_days

    def __analyze_per_category(self):
        # TODO treat income and Payment/Credit differently
        # Return a dictionary of spending per category
        spending_per_category = dict()
        for category in self.categories:
            total_for_category = self.parser.sum_total_category(category)
            spending_per_category[category] = total_for_category
        return spending_per_category

    def __analyze_per_cardholder(self):
        spending_per_cardholder = dict()
        for cardholder in self.cardholders:
            spending_per_cardholder[cardholder] = dict()
            for category in self.categories:
                total_for_category = self.parser.sum_total_category_per_cardholder(
                    category, cardholder)
                spending_per_cardholder[cardholder][
                    category] = total_for_category
        return spending_per_cardholder

    def __analyze_percent_per_category(self):
        total = self.get_total_spending()
        spending_per_category = self.get_spending_per_category()
        percentage_per_category = dict()
        for category in self.categories:
            percentage_per_category[
                category] = spending_per_category[category] / total
        return percentage_per_category

    def __analyze_percent_per_category_per_cardholder(self):
        percent_per_category_per_cardholder = dict()
        spending_per_category_per_cc = self.get_spending_per_category_per_cardholder(
        )
        for cardholder in self.cardholders:
            total = self.get_total_spending_per_cardholder()[cardholder]
            percent_per_category_per_cardholder[cardholder] = dict()
            for category in self.categories:
                percent_per_category_per_cardholder[cardholder][
                    category] = spending_per_category_per_cc[cardholder][
                        category] / total
        return percent_per_category_per_cardholder
Esempio n. 8
0
 def categorize(self, categorize_file):
     parser = CapitalOneParser(categorize_file)
     merchant_parser = MerchantParser(parser.df, confidence_level=.87)
     self.df["Category"] = self.df["Description"].apply(
         merchant_parser.get_category_for_retailer)