def statistics(self, account_name=None): if account_name: activity_by_account = self._activity_by_account(account_name) if len(activity_by_account) == 1: return activity_by_account[0] else: return None # nb_entries_by_type entries_by_type = misc_utils.groupby( lambda entry: type(entry).__name__, self.entries) nb_entries_by_type = {name: len(entries) for name, entries in entries_by_type.items()} all_postings = [posting for entry in self.entries if isinstance(entry, Transaction) for posting in entry.postings] # nb_postings_by_account postings_by_account = misc_utils.groupby( lambda posting: posting.account, all_postings) nb_postings_by_account = {account: len(postings) for account, postings in postings_by_account.items()} return { 'entries_by_type': nb_entries_by_type, 'entries_by_type_total': sum(nb_entries_by_type.values()), 'postings_by_account': nb_postings_by_account, 'postings_by_account_total': sum(nb_postings_by_account.values()), 'activity_by_account': self._activity_by_account() }
def statistics(self, account_name=None): if account_name: activity_by_account = self._activity_by_account(account_name) if len(activity_by_account) == 1: return activity_by_account[0] else: return None grouped_entries = misc_utils.groupby(type, self.entries) entries_by_type = {type.__name__: len(entries) for type, entries in grouped_entries.items()} all_postings = [posting for entry in self.entries if isinstance(entry, Transaction) for posting in entry.postings] grouped_postings = misc_utils.groupby( lambda posting: posting.account, all_postings) postings_by_account = {account: len(postings) for account, postings in grouped_postings.items()} return { 'entries_by_type': entries_by_type, 'postings_by_account': postings_by_account, 'activity_by_account': self._activity_by_account() }
def statistics(self, account_name=None): if account_name: activity_by_account = self._activity_by_account(account_name) return activity_by_account[0] if len( activity_by_account) == 1 else None else: # nb_entries_by_type entries_by_type = misc_utils.groupby( lambda entry: type(entry).__name__, self.entries) nb_entries_by_type = { name: len(entries) for name, entries in entries_by_type.items() } all_postings = [ posting for entry in self.entries if isinstance(entry, Transaction) for posting in entry.postings ] # nb_postings_by_account postings_by_account = misc_utils.groupby( lambda posting: posting.account, all_postings) nb_postings_by_account = { key: len(postings) for key, postings in postings_by_account.items() } return { 'entries_by_type': nb_entries_by_type, 'entries_by_type_total': sum(nb_entries_by_type.values()), 'postings_by_account': nb_postings_by_account, 'postings_by_account_total': sum(nb_postings_by_account.values()), 'activity_by_account': self._activity_by_account() }
def statistics(self, account_name=None): if account_name: activity_by_account = self._activity_by_account(account_name) if len(activity_by_account) == 1: return activity_by_account[0] else: return None grouped_entries = misc_utils.groupby(type, self.entries) entries_by_type = { type.__name__: len(entries) for type, entries in grouped_entries.items() } all_postings = [ posting for entry in self.entries if isinstance(entry, Transaction) for posting in entry.postings ] grouped_postings = misc_utils.groupby(lambda posting: posting.account, all_postings) postings_by_account = { account: len(postings) for account, postings in grouped_postings.items() } return { 'entries_by_type': entries_by_type, 'postings_by_account': postings_by_account, 'activity_by_account': self._activity_by_account() }
def aggregate_holdings_by(holdings, aggregation_key): """Aggregate holdings by the given key. They are always grouped by cost currency. """ if aggregation_key == 'currency': def _key(pos): return (pos.units.currency, pos.cost.currency if pos.cost else pos.units.currency) elif aggregation_key == 'account': def _key(pos): return (pos.account, pos.cost.currency if pos.cost else pos.units.currency) else: def _key(pos): return pos.cost.currency if pos.cost else pos.units.currency aggregated_holdings = [ aggregate_holdings_list(holdings) for _, holdings in misc_utils.groupby(_key, holdings).items() ] return sorted(aggregated_holdings, key=operator.attrgetter('account', 'units.currency'))
def postings_by_account(self): all_postings = [ posting for entry in self.entries if isinstance(entry, Transaction) for posting in entry.postings ] grouped_postings = misc_utils.groupby(lambda posting: posting.account, all_postings) return {account: len(postings) for account, postings in grouped_postings.items()}
def postings_by_account(self): all_postings = [posting for entry in self.entries if isinstance(entry, Transaction) for posting in entry.postings] grouped_postings = misc_utils.groupby( lambda posting: posting.account, all_postings) return {account: len(postings) for account, postings in grouped_postings.items()}
def generate_table(self, entries, _, __): entries_by_type = misc_utils.groupby(lambda entry: type(entry).__name__, entries) nb_entries_by_type = {name: len(entries) for name, entries in entries_by_type.items()} rows = sorted(nb_entries_by_type.items(), key=lambda x: x[1], reverse=True) rows = [(name, str(count)) for (name, count) in rows] rows.append(('~Total~', str(len(entries)))) return table.create_table(rows, [(0, 'Type'), (1, 'Num Entries', '{:>}'.format)])
def generate_table(self, entries, _, __): all_postings = [posting for entry in entries if isinstance(entry, data.Transaction) for posting in entry.postings] postings_by_account = misc_utils.groupby(lambda posting: posting.account, all_postings) nb_postings_by_account = {key: len(postings) for key, postings in postings_by_account.items()} rows = sorted(nb_postings_by_account.items(), key=lambda x: x[1], reverse=True) rows = [(name, str(count)) for (name, count) in rows] rows.append(('~Total~', str(sum(nb_postings_by_account.values())))) return table.create_table(rows, [(0, 'Account'), (1, 'Num Postings', '{:>}'.format)])
def aggregate_holdings_by(holdings, aggregation_key): """Aggregate holdings by the given key. They are always grouped by cost currency. """ if aggregation_key == 'currency': def _key(pos): return (pos.units.currency, pos.cost.currency if pos.cost else pos.units.currency) elif aggregation_key == 'account': def _key(pos): return (pos.account, pos.cost.currency if pos.cost else pos.units.currency) else: def _key(pos): return pos.cost.currency if pos.cost else pos.units.currency aggregated_holdings = [aggregate_holdings_list(holdings) for _, holdings in misc_utils.groupby(_key, holdings).items()] return sorted(aggregated_holdings, key=operator.attrgetter('account', 'units.currency'))
def main(): logging.basicConfig(level=logging.INFO, format='%(levelname)-8s: %(message)s') parser = argparse.ArgumentParser(description=__doc__.strip()) parser.add_argument('report', choices=['detail', 'aggregate', 'summary'], help='Type of report') parser.add_argument('filename', help='Beancount input file') parser.add_argument('account', help='Account name') parser.add_argument('--start', type=date_utils.parse_date_liberally, help="Start date") parser.add_argument('--end', type=date_utils.parse_date_liberally, help="End date; if not set, at the end of star'ts year") parser.add_argument('-o', '--output', action='store', help="Output filename for the CSV file") args = parser.parse_args() calculate_commission = False # Setup date interval. if args.start is None: args.start = datetime.date(datetime.date.today().year, 1, 1) if args.end is None: args.end = datetime.date(args.start.year + 1, 1, 1) entries, errors, options_map = loader.load_file(args.filename) # Expand each of the sales legs. balances = collections.defaultdict(inventory.Inventory) sales = [] for txn in data.filter_txns(entries): # If we got to the end of the period, bail out. if txn.date >= args.end: break # Accumulate the balances before the start date. if txn.date < args.start: for posting in txn.postings: if re.match(args.account, posting.account): balance = balances[posting.account] balance.add_position(posting) continue # Fallthrough: we're not in the period. Process the matching postings. # Find reducing postings (i.e., for each lot). txn_sales = [] for posting in txn.postings: if re.match(args.account, posting.account): balance = balances[posting.account] reduced_position, booking = balance.add_position(posting) # Set the cost on the posting from the reduced position. # FIXME: Eventually that'll happen automatically during the full # booking stage. if booking == inventory.Booking.REDUCED: posting = posting._replace(cost=reduced_position.cost) # If the postings don't have a reference number, ignore them. if 'ref' not in txn.meta: continue if (posting.cost and posting.units.number < ZERO): if not posting.price: logging.error("Missing price on %s", posting) txn_sales.append(data.TxnPosting(txn, posting)) if txn_sales and calculate_commission: # Find total commission. for posting in txn.postings: if re.search('Commission', posting.account): commission = posting.units.number break else: commission = ZERO # Compute total number of units. tot_units = sum(sale.posting.units.number for sale, _ in txn_sales) # Assign a proportion of the commission to each of the sales by # inserting it into its posting metadata. This will be processed below. for sale, _ in txn_sales: fraction = sale.posting.units.number / tot_units sale.posting.meta['commission'] = fraction * commission sales.extend(txn_sales) # Convert into a table of data, full detail of very single log. Q = D('0.01') lots = [] total_loss = collections.defaultdict(D) total_gain = collections.defaultdict(D) total_adj = collections.defaultdict(D) # If no mssb number has been assigned explicitly, assign a random one. I # need to figure out how to find those numbers again. auto_mssb_number = itertools.count(start=1000000 + 1) for sale in sales: try: sale_no = sale.txn.meta['mssb'] except KeyError: sale_no = next(auto_mssb_number) ref = sale.txn.meta['ref'] units = sale.posting.units totcost = (-units.number * sale.posting.cost.number).quantize(Q) totprice = (-units.number * sale.posting.price.number).quantize(Q) commission_meta = sale.posting.meta.get('commission', None) if commission_meta is None: commission = ZERO else: if calculate_commission: commission = commission_meta else: # Fetch the commission that was inserted by the commissions plugin. commission = commission_meta[0].units.number commission = commission.quantize(Q) pnl = (totprice - totcost - commission).quantize(Q) is_wash = sale.posting.meta.get('wash', False) if totprice > totcost: total_gain[units.currency] += pnl else: total_loss[units.currency] += pnl if is_wash: total_adj[units.currency] += pnl code = 'W' adj = -pnl else: code = '' adj = '' days_held = (sale.txn.date - sale.posting.cost.date).days term = 'LONG' if days_held >= 365 else 'SHORT' lot = LotSale(sale_no, ref, sale.posting.cost.date, sale.txn.date, days_held, term, units.currency, -units.number.quantize(Q), sale.posting.cost.number.quantize(Q), sale.posting.price.number.quantize(Q), totcost, totprice, commission, totprice - commission, pnl, code, adj) lots.append(lot) tab_detail = table.create_table(lots, fieldspec) # Aggregate by transaction in order to be able to cross-check against the # 1099 forms. agglots = [aggregate_sales(lots) for _, lots in misc_utils.groupby( lambda lot: (lot.no, lot.ref), lots).items()] tab_agg = table.create_table(sorted(agglots, key=lambda lot: (lot.ref, lot.no)), fieldspec) # Write out a summary of P/L. summary_fields = list(enumerate(['Currency', 'Gain', 'Loss', 'Net', 'Adj/Wash'])) summary = [] gain = ZERO loss = ZERO adj = ZERO for currency in sorted(total_adj.keys()): gain += total_gain[currency] loss += total_loss[currency] adj += total_adj[currency] summary.append((currency, total_gain[currency], total_loss[currency], total_gain[currency] + total_loss[currency], total_adj[currency])) summary.append(('*', gain, loss, gain + loss, adj)) tab_summary = table.create_table(summary, summary_fields) if args.report == 'detail': # Render to the console. print('Detail of all lots') print('=' * 48) table.render_table(tab_detail, sys.stdout, 'txt') print() if args.output: with open(args.output, 'w') as file: table.render_table(tab_detail, file, 'csv') elif args.report == 'aggregate': print('Aggregated by trade & Reference Number (to Match 1099/Form8459)') print('=' * 48) table.render_table(tab_agg, sys.stdout, 'txt') print() if args.output: with open(args.output, 'w') as file: table.render_table(tab_agg, file, 'csv') elif args.report == 'summary': print('Summary') print('=' * 48) table.render_table(tab_summary, sys.stdout, 'txt') print() if args.output: with open(args.output, 'w') as file: table.render_table(tab_summary, file, 'csv')
def test_groupby(self): data = [('a', 1), ('b', 2), ('c', 3), ('d', 4)] grouped = misc_utils.groupby(lambda x: x[0], data) self.assertEqual(set(['a', 'b', 'c', 'd']), grouped.keys()) self.assertEqual([[('a', 1)], [('b', 2)], [('c', 3)], [('d', 4)]], sorted(grouped.values()))
def pad(entries, options_map): """Insert transaction entries for to fulfill a subsequent balance check. Synthesize and insert Transaction entries right after Pad entries in order to fulfill checks in the padded accounts. Returns a new list of entries. Note that this doesn't pad across parent-child relationships, it is a very simple kind of pad. (I have found this to be sufficient in practice, and simpler to implement and understand.) Furthermore, this pads for a single currency only, that is, balance checks are specified only for one currency at a time, and pads will only be inserted for those currencies. Args: entries: A list of directives. options_map: A parser options dict. Returns: A new list of directives, with Pad entries inserte, and a list of new errors produced. """ pad_errors = [] # Find all the pad entries and group them by account. pads = list(misc_utils.filter_type(entries, data.Pad)) pad_dict = misc_utils.groupby(lambda x: x.account, pads) # Partially realize the postings, so we can iterate them by account. by_account = realization.postings_by_account(entries) # A dict of pad -> list of entries to be inserted. new_entries = {id(pad): [] for pad in pads} # Process each account that has a padding group. for account_, pad_list in sorted(pad_dict.items()): # Last encountered / currency active pad entry. active_pad = None # Gather all the postings for the account and its children. postings = [] is_child = account.parent_matcher(account_) for item_account, item_postings in by_account.items(): if is_child(item_account): postings.extend(item_postings) postings.sort(key=data.posting_sortkey) # A set of currencies already padded so far in this account. padded_lots = set() pad_balance = inventory.Inventory() for entry in postings: assert not isinstance(entry, data.Posting) if isinstance(entry, data.TxnPosting): # This is a transaction; update the running balance for this # account. pad_balance.add_position(entry.posting.position) elif isinstance(entry, data.Pad): if entry.account == account_: # Mark this newly encountered pad as active and allow all lots # to be padded heretofore. active_pad = entry padded_lots = set() elif isinstance(entry, data.Balance): check_amount = entry.amount # Compare the current balance amount to the expected one from # the check entry. IMPORTANT: You need to understand that this # does not check a single position, but rather checks that the # total amount for a particular currency (which itself is # distinct from the cost). balance_amount = pad_balance.get_units(check_amount.currency) diff_amount = amount.amount_sub(balance_amount, check_amount) # Use the specified tolerance or automatically infer it. tolerance = balance.get_tolerance(entry, options_map) if abs(diff_amount.number) > tolerance: # The check fails; we need to pad. # Pad only if pad entry is active and we haven't already # padded that lot since it was last encountered. if active_pad and (check_amount.currency not in padded_lots): # Note: we decide that it's an error to try to pad # positions at cost; we check here that all the existing # positions with that currency have no cost. positions = [ pos for pos in pad_balance.get_positions() if pos.lot.currency == check_amount.currency ] for position_ in positions: if position_.lot.cost is not None: pad_errors.append( PadError(entry.meta, ( "Attempt to pad an entry with cost for " "balance: {}".format(pad_balance)), active_pad)) # Thus our padding lot is without cost by default. lot = position.Lot(check_amount.currency, None, None) diff_position = position.Position( lot, check_amount.number - balance_amount.number) # Synthesize a new transaction entry for the difference. narration = ('(Padding inserted for Balance of {} for ' 'difference {})').format( check_amount, diff_position) new_entry = data.Transaction(active_pad.meta.copy(), active_pad.date, flags.FLAG_PADDING, None, narration, None, None, []) new_entry.postings.append( data.Posting(active_pad.account, diff_position, None, None, None)) new_entry.postings.append( data.Posting(active_pad.source_account, -diff_position, None, None, None)) # Save it for later insertion after the active pad. new_entries[id(active_pad)].append(new_entry) # Fixup the running balance. position_, _ = pad_balance.add_position(diff_position) if position_.is_negative_at_cost(): raise ValueError( "Position held at cost goes negative: {}". format(position_)) # Mark this lot as padded. Further checks should not pad this lot. padded_lots.add(check_amount.currency) # Insert the newly created entries right after the pad entries that created them. padded_entries = [] for entry in entries: padded_entries.append(entry) if isinstance(entry, data.Pad): entry_list = new_entries[id(entry)] if entry_list: padded_entries.extend(entry_list) else: # Generate errors on unused pad entries. pad_errors.append( PadError(entry.meta, "Unused Pad entry", entry)) return padded_entries, pad_errors
def aggregate_sales(lots): return [ aggregate_lot_sales(glots) for _, glots in misc_utils.groupby( lambda lot: (lot.ref, lot.term), lots).items() ]