Exemple #1
0
def _split_symbol_mappings(df):
    """Split out the symbol: sid mappings from the raw data.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe with multiple rows for each symbol: sid pair.

    Returns
    -------
    asset_info : pd.DataFrame
        The asset info with one row per asset.
    symbol_mappings : pd.DataFrame
        The dataframe of just symbol: sid mappings. The index will be
        the sid, then there will be three columns: symbol, start_date, and
        end_date.
    """
    mappings = df[list(mapping_columns)]
    for symbol in mappings.symbol.unique():
        persymbol = mappings[mappings.symbol == symbol]
        intersections = list(intersecting_ranges(
            map(from_tuple, zip(persymbol.start_date, persymbol.end_date)),
        ))
        if intersections:
            raise ValueError(
                'Ambiguous ownership of %r, multiple companies held this'
                ' ticker over the following ranges:\n%s' % (
                    symbol,
                    list(map(_format_range, intersections)),
                ),
            )
    return (
        df.groupby(level=0).apply(_check_asset_group),
        df[list(mapping_columns)],
    )
def _split_symbol_mappings(df):
    """Split out the symbol: sid mappings from the raw data.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe with multiple rows for each symbol: sid pair.

    Returns
    -------
    asset_info : pd.DataFrame
        The asset info with one row per asset.
    symbol_mappings : pd.DataFrame
        The dataframe of just symbol: sid mappings. The index will be
        the sid, then there will be three columns: symbol, start_date, and
        end_date.
    """
    mappings = df[list(mapping_columns)]
    ambigious = {}
    for symbol in mappings.symbol.unique():
        persymbol = mappings[mappings.symbol == symbol]
        intersections = list(intersecting_ranges(map(
            from_tuple,
            zip(persymbol.start_date, persymbol.end_date),
        )))
        if intersections:
            ambigious[symbol] = (
                intersections,
                persymbol[['start_date', 'end_date']].astype('datetime64[ns]'),
            )

    if ambigious:
        raise ValueError(
            'Ambiguous ownership for %d symbol%s, multiple assets held the'
            ' following symbols:\n%s' % (
                len(ambigious),
                '' if len(ambigious) == 1 else 's',
                '\n'.join(
                    '%s:\n  intersections: %s\n  %s' % (
                        symbol,
                        tuple(map(_format_range, intersections)),
                        # indent the dataframe string
                        '\n  '.join(str(df).splitlines()),
                    )
                    for symbol, (intersections, df) in sorted(
                        ambigious.items(),
                        key=first,
                    ),
                ),
            )
        )
    return (
        df.groupby(level=0).apply(_check_asset_group),
        df[list(mapping_columns)],
    )
Exemple #3
0
 def check_intersections(persymbol):
     intersections = list(intersecting_ranges(map(
         from_tuple,
         zip(persymbol.start_date, persymbol.end_date),
     )))
     if intersections:
         data = persymbol[
             ['start_date', 'end_date']
         ].astype('datetime64[ns]')
         # indent the dataframe string, also compute this early because
         # ``persymbol`` is a view and ``astype`` doesn't copy the index
         # correctly in pandas 0.22
         msg_component = '\n  '.join(str(data).splitlines())
         ambigious[persymbol.name] = intersections, msg_component
Exemple #4
0
 def check_intersections(persymbol):
     intersections = list(intersecting_ranges(map(
         from_tuple,
         zip(persymbol.start_date, persymbol.end_date),
     )))
     if intersections:
         data = persymbol[
             ['start_date', 'end_date']
         ].astype('datetime64[ns]')
         # indent the dataframe string, also compute this early because
         # ``persymbol`` is a view and ``astype`` doesn't copy the index
         # correctly in pandas 0.22
         msg_component = '\n  '.join(str(data).splitlines())
         ambigious[persymbol.name] = intersections, msg_component