Beispiel #1
0
def parse(filename: str) -> Table:
    """Parse the American Funds download."""

    with open(filename) as infile:
        reader = csv.reader(infile)
        header = next(reader)
        rows = list(reader)
    tbl = Table(header, [str] * len(header), rows)

    # Compute fraction.
    tbl = utils.create_fraction_from_market_value(tbl, 'market_value')

    # Add asset class.
    cls = {
        'Equity': 'Equity',
        'Fixed Income': 'FixedIncome',
        'Short Term': 'ShortTerm'
    }
    tbl = (tbl.map('asset_type', cls.__getitem__).rename(
        ('asset_type', 'asstype')))

    # Set name column.
    tbl = tbl.rename(('security_name', 'name'))

    # Cull the final set of produced columns.
    return tbl.select(['fraction', 'asstype', 'name'])
Beispiel #2
0
def normalize_holdings_table(tbl: Table) -> Table:
    """The assets don't actually sum to 100%, normalize them."""
    total = sum([row.fraction for row in tbl])
    if not 0.98 < total < 1.02:
        logging.error("Total weight seems invalid: %s", total)
    scale = 1. / total
    return tbl.map('fraction', lambda f: f * scale)
Beispiel #3
0
def parse(filename: str) -> Dict[str, Table]:
    """Load tables from the CSV file."""
    with open(filename) as infile:
        reader = csv.reader(infile)
        rows = list(reader)
    sections = csv_utils.csv_split_sections_with_titles(rows)
    table_map = {
        title: Table(rows[0], [str] * len(rows[0]), rows[1:])
        for title, rows in sections.items()
    }
    parsers = {
        'Equity': parse_equity,
        'Fixed income': parse_fixed_income,
        'Short-term reserves': parse_shortterm_reserves,
    }
    tables = []
    for title, tbl in table_map.items():
        parser = parsers[title]
        subtbl = parser(tbl)
        subtbl.checkall(VALUES_COLUMNS)
        tables.append(subtbl)

    values_table = table.concat(*tables)
    # pylint: disable=bad-continuation
    return (utils.create_fraction_from_market_value(
        values_table, 'market_value').map(
            'ticker', lambda ticker: ticker if ticker != '-' else '').rename(
                ('holdings', 'name')).map('sedol', utils.empty_dashes).select(
                    ['fraction', 'asstype', 'name', 'ticker', 'sedol']))
Beispiel #4
0
def parse(filename: str) -> Table:
    """Parse the iShares holdings file."""
    header, outrows = find_table(filename)
    tbl = Table(header, [str] * len(header), outrows)

    # Create fraction column.
    tbl = utils.create_fraction_from_market_value(tbl, 'market_value')

    # Add ticker column.
    if 'Ticker' in header:
        tbl = (tbl.create('asstype',
                          lambda _: 'Equity').map('ticker', str.strip))
    else:
        tbl = (tbl.create('asstype', lambda _: 'FixedIncome').create(
            'ticker', lambda _: ''))
    return (tbl.map('ticker', utils.empty_dashes).map(
        'sedol', utils.empty_dashes).map('isin', utils.empty_dashes).select(
            ['fraction', 'asstype', 'name', 'ticker', 'sedol', 'isin']))
Beispiel #5
0
def get_chain_table(dateMap):
    columns = sorted(first(first(dateMap.values()).values())[0].keys())
    rows = []
    for dateDays, priceMap in sorted(dateMap.items()):
        _, __, days = dateDays.partition(':')
        for price, optionlist in sorted(priceMap.items()):
            for option in optionlist:
                rows.append([option[col] for col in columns])
    return Table(columns, [str] * len(columns), rows)
Beispiel #6
0
def parse_shortterm_reserves(tbl: Table) -> Table:
    """Parse the Short-term reserves table."""
    index = None
    for fname in 'face_amount', 'face_amount_local_currency':
        if fname in tbl.columns:
            index = tbl.columns.index(fname)
            break
    assert index is not None
    return (tbl.create('asstype', lambda _: 'ShortTerm').rename(
        (fname, 'market_value')).create('ticker', lambda _: '').update(
            'sedol', lambda row: row.sedol
            if row.sedol != '-' else '').select(VALUES_COLUMNS))
Beispiel #7
0
def parse(filename: str) -> Table:
    """Parse the PowerShares holdings file."""
    with open(filename) as infile:
        reader = csv.reader(infile)
        header = next(reader)
        rows = list(reader)
    tbl = Table(header, [str] * len(header), rows)

    # Compute market value.
    tbl = utils.create_fraction_from_market_value(tbl, 'marketvalue')

    # Create asset type column.
    tbl = tbl.create('asstype', lambda _: 'Equity')

    # Create identifier columns.
    tbl = (tbl.check(['name']).rename(
        ('holdingsticker', 'ticker')).map('ticker', str.strip).rename(
            ('securitynum', 'cusip')))
    # What about 'securitynum'? What is it?

    return tbl.select(['fraction', 'asstype', 'name', 'ticker', 'cusip'])
Beispiel #8
0
def check_holdings(holdings: Table):
    """Check that the holdings Table has the required columns."""
    actual = set(holdings.columns)

    allowed = {'asstype', 'fraction'} | set(IDCOLUMNS)
    other = actual - allowed
    assert not other, "Extra columns found: {}".format(other)

    required = {'asstype', 'fraction'}
    assert required.issubset(actual), (
        "Required columns missing: {}".format(required - actual))

    assert set(IDCOLUMNS) & actual, "No ids columns found: {}".format(actual)
    assert all(cls in ASSTYPES for cls in holdings.values('asstype'))

    # Check that '-' don't appear in identifier columns.
    for column in IDCOLUMNS:
        if column not in holdings.columns:
            continue
        values = holdings.values(column)
        if '-' in values:
            raise ValueError("Invalid value '-' in column '{}'".format(column))
Beispiel #9
0
def parse(filename: str) -> Table:
    """Parse the SPDRs holdings file."""
    header, rows = read_table(filename)
    tbl = Table(header, [str] * len(header), rows)

    # Use weight column as fraction directly.
    tbl = tbl.map('weight', float).rename(('weight', 'fraction'))
    total_value = sum(tbl.itervalues('fraction'))
    if not 99 <= total_value <= 101:
        logging.error("Total value is invalid: %s", total_value)
    tbl = tbl.map('fraction', lambda f: f/total_value)

    # Create asset type column.
    tbl = tbl.create('asstype', lambda _: 'Equity')

    # Add identifiers.
    tbl = (tbl
           .check(['name'])
           .rename(('identifier', 'ticker')))

    return tbl.select(['fraction', 'asstype', 'name', 'ticker'])
Beispiel #10
0
def main():
    """Collect all the assets and holdings and disaggregate."""
    logging.basicConfig(level=logging.INFO,
                        format='%(levelname)-8s: %(message)s')
    parser = argparse.ArgumentParser(description=__doc__.strip())

    parser.add_argument(
        'portfolio',
        help=('A CSV file which contains the tickers of assets and '
              'number of units'))
    parser.add_argument(
        '--dbdir',
        default=database.DEFAULT_DIR,
        help="Database directory to write all the downloaded files.")
    parser.add_argument(
        '-i',
        '--ignore-missing-issuer',
        action='store_true',
        help="Ignore positions where the issuer implementation is missing")
    parser.add_argument('-o',
                        '--ignore-options',
                        action='store_true',
                        help=("Ignore options positions "
                              "(only works with  Beancount export file)"))
    parser.add_argument('-l',
                        '--ignore-shorts',
                        action='store_true',
                        help="Ignore short positions")

    parser.add_argument(
        '-t',
        '--threshold',
        action='store',
        type=float,
        default=0,
        help="Remove holdings whose value is under a threshold")

    parser.add_argument('-F',
                        '--full-table',
                        action='store',
                        help="Path to write the full table to.")

    parser.add_argument('-A',
                        '--agg-table',
                        action='store',
                        help="Path to write the full table to.")

    parser.add_argument('-D',
                        '--debug-output',
                        action='store',
                        help="Path to debugging output of grouping algorithm.")

    args = parser.parse_args()
    db = database.Database(args.dbdir)

    # Load up the list of assets from the exported Beancount file.
    assets = beansupport.read_portfolio(args.portfolio, args.ignore_options)
    assets.checkall(['ticker', 'account', 'issuer', 'price', 'quantity'])

    assets = assets.order(lambda row: (row.issuer, row.ticker))

    # Fetch baskets for each of those.
    alltables = []
    for row in assets:
        if row.quantity < 0 and args.ignore_shorts:
            continue

        if not row.issuer:
            holdings = Table(['fraction', 'asstype', 'ticker'],
                             [str, str, str], [[1.0, 'Equity', row.ticker]])
        else:
            downloader = issuers.get(row.issuer)
            if downloader is None:
                message = "Missing issuer: {}".format(issuer)
                if args.ignore_missing_issuer:
                    logging.error(message)
                    continue
                else:
                    raise SystemExit(message)

            filename = database.getlatest(db, row.ticker)
            if filename is None:
                logging.error("Missing file for %s", row.ticker)
                continue
            logging.info("Parsing file '%s' with '%s'", filename, row.issuer)

            if not hasattr(downloader, 'parse'):
                logging.error("Parser for %s is not implemented", row.ticker)
                continue

            # Parse the file.
            holdings = downloader.parse(filename)
            check_holdings(holdings)

        # Add parent ETF and fixup columns.
        holdings = add_missing_columns(holdings)
        holdings = holdings.create('etf', lambda _, row=row: row.ticker)
        holdings = holdings.create('account', lambda _, row=row: row.account)
        holdings = holdings.select(COLUMNS)

        # Convert fraction to dollar amount.
        dollar_amount = row.quantity * row.price
        holdings = (holdings.create(
            'amount',
            lambda row, a=dollar_amount: row.fraction * a).delete(['fraction'
                                                                   ]))

        alltables.append(holdings)
    fulltable = table.concat(*alltables)

    # Aggregate the holdings.
    aggtable, annotable = graph.group(fulltable, args.debug_output)
    if args.agg_table:
        with open(args.agg_table, 'w') as outfile:
            table.write_csv(aggtable, outfile)

    # Remove the holdings whose aggregate sum is under a threshold.
    if args.threshold:
        filt_annotable = annotable.filter(
            lambda row: aggtable.rows[row.group].amount > args.threshold)

    # Write out the full table.
    logging.info("Total amount from full holdings table: {:.2f}".format(
        numpy.sum(fulltable.array('amount'))))
    logging.info("Total amount from annotated holdings table: {:.2f}".format(
        numpy.sum(filt_annotable.array('amount'))))
    if args.full_table:
        with open(args.full_table, 'w') as outfile:
            table.write_csv(filt_annotable, outfile)

    # Cull out the tail of holdings for printing.
    tail = 0.90
    amount = aggtable.array('amount')
    total_amount = numpy.sum(amount)
    logging.info('Total: {:.2f}'.format(total_amount))
    cum_amount = numpy.cumsum(amount)
    headsize = len(amount[cum_amount < total_amount * tail])
    print(aggtable.head(headsize))
Beispiel #11
0
def add_missing_columns(tbl: Table) -> Table:
    """Add empty identifier columns to the table."""
    for column in IDCOLUMNS:
        if column not in tbl.columns:
            tbl = tbl.create(column, lambda _: '')
    return tbl
Beispiel #12
0
def create_fraction_from_market_value(tbl: Table, column: str) -> Table:
    """Create a 'fraction' column computed from the market value column."""
    tbl = tbl.map(column, convert_dollar_amount)
    total_value = sum(max(0, value) for value in tbl.itervalues(column))
    return tbl.create('fraction',
                      lambda row: max(0, getattr(row, column)) / total_value)
Beispiel #13
0
def HoldingsTable(rows):
    """Normalized extracted contents of an holdings file download."""
    return Table(['ticker', 'fraction', 'description'],
                 [str, float, str],
                 rows)
Beispiel #14
0
def parse_fixed_income(tbl: Table) -> Table:
    """Parse the Fixed income table."""
    return (tbl.create('asstype', lambda _: 'FixedIncome').create(
        'ticker', lambda _: '').update(
            'sedol', lambda row: row.sedol
            if row.sedol != '-' else '').select(VALUES_COLUMNS))
Beispiel #15
0
def parse_equity(tbl: Table) -> Table:
    """Parse the Equity table."""
    return (tbl.create('asstype', lambda _: 'Equity').map(
        'ticker', str.strip).select(VALUES_COLUMNS))
Beispiel #16
0
def group(holdings: Table, debug_filename: str = None) -> Tuple[Table, Table]:
    """Group assets by similarity."""

    # Compute the connected components.
    g = build_graph(holdings)
    cc = nx.connected_components(g)
    logging.info('Num connected components: %s',
                 nx.number_connected_components(g))

    # Process each component.
    counts = collections.defaultdict(int)
    debugfile = open(debug_filename, 'w') if debug_filename else None
    groups = []
    for component in cc:
        # Separate out the rows and links.
        rows = []
        links = []
        for c in component:
            # pylint: disable=unidiomatic-typecheck
            (links if type(c) is tuple else rows).append(c)
        counts[len(rows)] += 1
        groups.append(rows)

        # Print all groups to a test file.
        if debugfile:
            print_group(rows, links, debugfile)

        # if ('ticker', 'GOOG') in links or ('ticker', 'GOOGL') in links:
        #     print_detailed_debug_info(c, g)

        # if 0:
        #     # Print groups with mixed asset types.
        #     if len(set(row.asstype for row in rows)) > 1:
        #         print_group(rows, links)

        # if 0:
        #     # Print groups without any ticker.
        #     if len(rows) != 1:
        #         continue
        #     linkdict = dict(links)
        #     if linkdict.get('ticker', None):
        #         continue
        #     print_group(rows, links)

    if debugfile is not None:
        debugfile.close()
    logging.info('Matched: {:%}'.format(1 - counts[1] / sum(counts.values())))
    logging.info('Items distribution (log-floored):')
    # Convert to log map.
    logcounts = collections.defaultdict(int)
    for numitems, count in sorted(counts.items()):
        lognumitems = int(math.pow(2, int(math.log2(numitems))))
        logcounts[lognumitems] += count
    for numitems, count in sorted(logcounts.items()):
        logging.info('   {:>3}~{:>3} items: {:10}'.format(
            numitems - 1, numitems, count))

    # Reduce the rows and produce an aggregated table.
    aggrows = []
    sorted_groups = sorted(groups,
                           key=lambda grows: -sum(row.amount for row in grows))
    for rows in sorted_groups:
        assert rows
        amount = sum(row.amount for row in rows)
        # Select the longest name. It seems to nealy always be the best variant.
        names = sorted(set(row.name for row in rows), key=len, reverse=True)
        name = names[0]
        symbol = ','.join(sorted(set(row.ticker for row in rows
                                     if row.ticker)))
        asstype = ','.join(sorted(set(row.asstype for row in rows)))
        aggrows.append((symbol, asstype, name, amount))
    columns = ['symbol', 'asstype', 'name', 'amount']
    aggtable = (Table(columns, [str, str, str, float],
                      aggrows).order(lambda row: row.amount, asc=False))

    # Reproduce the original table, but with the row groups annotated this time.
    annotation_map = {}
    for index, rows in enumerate(sorted_groups):
        for row in rows:
            annotation_map[row] = index
    annotable = (holdings.create(
        'group',
        annotation_map.__getitem__).order(lambda row: (row.group, -row.amount))
                 )
    assert len(holdings) == len(annotable)

    return aggtable, annotable