예제 #1
0
def process_symbol(sec_name):
    print("Processing %s" % sec_name)
    instrument = isecurity.factory(sec_name)

    md_dict = dict()
    for key in instrument.metadata.keys():
        md_dict[key] = instrument.metadata[key]

    # Create a frame from the expiry map, which will have contract names as the index
    df = pandas.Series(md_dict['expiry_map'])
    df = df.map(lambda x: pandas.Period(x, freq="B"))
    df = df.to_frame('expiration')
    df.index.name = 'contract'

    # Add a column for the inception dates
    # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future
    inceptions = md_dict.get('inception_map', None)
    if inceptions is not None:
        contract_inception = pandas.Series(inceptions)
        contract_inception = contract_inception.map(
            lambda x: pandas.Period(x, freq="B"))
        df.loc[:, 'inception'] = contract_inception
    else:
        df.loc[:, 'inception'] = None

    # Gather start/end stats for all relevant contracts
    contracts_info = OrderedDict()
    for contract_name in df.index:
        if contract_name in BAD_CONTRACTS:
            continue

        METADATA[contract_name] = sec_name

        expiry = df.loc[contract_name, 'expiration']

        end_p = indaux.apply_offset(expiry, offsets.BDay())
        if end_p <= DEEP_PAST:
            continue

        # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future.
        # inception = df.loc[contract_name, 'inception']
        # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices.
        start_p = max(
            indaux.apply_offset(expiry, -BDAYS_PER_CONTRACT * offsets.BDay()),
            DEEP_PAST)

        # Convert to timezone-aware timestamps and naive datetimes (for efficient use elsewhere)
        start_ts = start_p.to_timestamp().tz_localize(pytz.UTC)
        end_ts = end_p.to_timestamp().tz_localize(pytz.UTC)
        start_dt = start_p.to_timestamp().to_datetime()
        end_dt = end_p.to_timestamp().to_datetime()
        contracts_info[contract_name] = (start_ts, end_ts, start_dt, end_dt)

    get_all_intraday_data(instrument, contracts_info)
예제 #2
0
def process_symbol(sec_name):
    print("Processing %s" % sec_name)
    instrument = isecurity.factory(sec_name)

    md_dict = dict()
    for key in instrument.metadata.keys():
        md_dict[key] = instrument.metadata[key]

    # Create a frame from the expiry map, which will have contract names as the index
    df = pandas.Series(md_dict['expiry_map'])
    df = df.map(lambda x: pandas.Period(x, freq="B"))
    df = df.to_frame('expiration')
    df.index.name = 'contract'

    # Add a column for the inception dates
    # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future
    inceptions = md_dict.get('inception_map', None)
    if inceptions is not None:
        contract_inception = pandas.Series(inceptions)
        contract_inception = contract_inception.map(lambda x: pandas.Period(x, freq="B"))
        df.loc[:, 'inception'] = contract_inception
    else:
        df.loc[:, 'inception'] = None

    # Gather start/end stats for all relevant contracts
    contracts_info = OrderedDict()
    for contract_name in df.index:
        if contract_name in BAD_CONTRACTS:
            continue

        METADATA[contract_name] = sec_name

        expiry = df.loc[contract_name, 'expiration']

        end_p = indaux.apply_offset(expiry, offsets.BDay())
        if end_p <= DEEP_PAST:
            continue

        # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future.
        # inception = df.loc[contract_name, 'inception']
        # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices.
        start_p = max(indaux.apply_offset(expiry, -BDAYS_PER_CONTRACT * offsets.BDay()), DEEP_PAST)

        # Convert to timezone-aware timestamps and naive datetimes (for efficient use elsewhere)
        start_ts = start_p.to_timestamp().tz_localize(pytz.UTC)
        end_ts = end_p.to_timestamp().tz_localize(pytz.UTC)
        start_dt = start_p.to_timestamp().to_datetime()
        end_dt = end_p.to_timestamp().to_datetime()
        contracts_info[contract_name] = (start_p, end_p, start_dt, end_dt)

    get_all_intraday_data(instrument, contracts_info)
예제 #3
0
def process_symbol(sec_name, through_date):
    if METADATA.blacklist(sec_name):
        print("Skipping %s because it is blacklisted" % sec_name)
        return

    current_through = METADATA.current_through(sec_name)
    if current_through and (current_through >= through_date):
        print("Skipping %s because it is already current through %s" %
              (sec_name, current_through))
        return

    print("Processing %s" % sec_name)
    millis = current_millis()
    instrument = isecurity.factory(sec_name)

    md_dict = dict()
    for key in instrument.metadata.keys():
        md_dict[key] = instrument.metadata[key]
    write_blob(sec_name, None, price.PriceData.METADATA, md_dict)

    # Create a frame from the expiry map, which will have contract names as the index
    df = pandas.Series(md_dict['expiry_map'])
    df = df.map(lambda x: pandas.Period(x, freq="B"))
    df = df.to_frame('expiration')
    df.index.name = 'contract'

    # Add a column for the inception dates
    # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future
    inceptions = md_dict.get('inception_map', None)
    if inceptions is not None:
        contract_inception = pandas.Series(inceptions)
        contract_inception = contract_inception.map(
            lambda x: pandas.Period(x, freq="B"))
        df.loc[:, 'inception'] = contract_inception
    else:
        df.loc[:, 'inception'] = None

    # If doing an update, find the threshold that a contract needs to expire on or after to be relevant
    threshold_p = BACKTEST_INDEX[0]  # type: pandas.Period
    if current_through:
        correction_p = (pandas.Period(datetime.date.today(), freq='D') -
                        METADATA.correction_days(sec_name)).asfreq('B')
        current_through_p = pandas.Period(current_through, freq='B')
        threshold_p = max(threshold_p, min(correction_p, current_through_p))

    # Gather start/end stats for all relevant contracts
    contracts_info = OrderedDict()
    for contract_name in df.index:
        if contract_name in BAD_CONTRACTS:
            continue

        METADATA.contract_map[contract_name] = sec_name

        expiry = df.loc[contract_name, 'expiration']

        # Discard contracts that expire before the beginning of the period
        end_p = indaux.apply_offset(expiry, offsets.BDay())
        if end_p < threshold_p:
            # print("Skipping %s for ending %s, before threshold %s" % (contract_name, end_p, threshold_p))
            continue

        # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future.
        # inception = df.loc[contract_name, 'inception']
        # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices.
        start_p = max(
            BACKTEST_INDEX[0],
            indaux.apply_offset(
                expiry,
                -METADATA.bdays_per_contract(sec_name) * offsets.BDay()))

        # Convert to naive datetimes (for efficient use elsewhere)
        start_dt = start_p.to_timestamp().to_datetime()
        end_dt = end_p.to_timestamp().to_datetime()
        contracts_info[contract_name] = (start_p, end_p, start_dt, end_dt)

    if DO_INTRADAY and check_intraday(sec_name):
        write_intraday_data(sec_name, contracts_info, current_through)

    # This will hold multiple contracts' close prices, to be concatenated later
    close_dfs = list()

    # Gather the security's data one contract at a time, appending to those variables above
    for contract_name, (__, __, start_dt,
                        end_dt) in contracts_info.iteritems():
        if DO_DAILY:
            contract_close_df = get_close_prices(contract_name, start_dt,
                                                 end_dt)
            if contract_close_df is not None:
                close_dfs.append(contract_close_df)

    if DO_DAILY and (len(close_dfs) > 0):
        security_close_df = pandas.concat(close_dfs)
        # noinspection PyTypeChecker
        write_daily_data(sec_name, security_close_df, current_through)

    # Only indicate that we are "current" if neither daily or intraday were shut off
    if DO_DAILY and DO_INTRADAY:
        METADATA.current_through(sec_name, through_date)

    log_millis(millis, "Total time: ")
예제 #4
0
def analyze_contract(this_contract, inception, expiry, close, volume):
    if (expiry is None) or (isinstance(expiry, numbers.Number) and numpy.isnan(expiry)):
        print this_contract, "has no expiry date"
        expiry = TODAY
    if (inception is None) or (isinstance(inception, numbers.Number) and numpy.isnan(inception)):
        print this_contract, "has no inception date"
        if GUESS_INCEPTION:
            inception = indaux.apply_offset(expiry, -SEVEN_YRS)
        else:
            inception = DEEP_PAST

    contract_df = pandas.DataFrame.from_dict(
        OrderedDict((
            ('Contract', this_contract),
            ('Inception', inception),
            ('Expiry', expiry),
            ('Close', close),
            ('Volume', volume))
        )
    )
    contract_df.index.name = 'Date'
    contract_df = contract_df.reset_index()

    # Check for zeroes
    inside_df = contract_df[(contract_df.Date >= contract_df.Inception) & (contract_df.Date <= contract_df.Expiry)]
    zeroes_df = inside_df[inside_df.Close == 0.0]
    if len(zeroes_df):
        vols_df = zeroes_df[zeroes_df.Volume > 0.0]
        print "%s has %d zero prices, %d with positive volumes" % (this_contract, len(zeroes_df), len(vols_df))

    # Reduce it to those rows that have prices with dates outside of the (Inception, Expiry) range
    outside_df = contract_df[(contract_df.Date < contract_df.Inception) | (contract_df.Date > contract_df.Expiry)]
    if len(outside_df) <= 0:
        return

    # Find the really old ones indicative of a one/two digit year problem
    too_old_date = indaux.apply_offset(expiry, -ALMOST_10_YRS)
    old_df = outside_df[outside_df.Date <= too_old_date]
    if len(old_df):
        vols_df = old_df[old_df.Volume > 0.0]
        print "%s has %d too old prices, %d with positive volumes" % (this_contract, len(old_df), len(vols_df))

    # Find the really recent ones indicative of a one/two digit year problem
    too_recent_date = indaux.apply_offset(inception, ALMOST_10_YRS)
    recent_df = outside_df[outside_df.Date >= too_recent_date]
    if len(recent_df):
        vols_df = recent_df[recent_df.Volume > 0.0]
        print "%s has %d too recent prices, %d with positive volumes" % (this_contract, len(recent_df), len(vols_df))

    # Get rid of those egregious outliers
    outside_df = outside_df[(outside_df.Date > too_old_date) & (outside_df.Date < too_recent_date)]

    # Report on those before inception date
    pre_df = outside_df[outside_df.Date < outside_df.Inception]
    if len(pre_df):
        vols_df = pre_df[pre_df.Volume > 0.0]
        print "%s has %d prices before inception, %d with positive volumes" % (this_contract, len(pre_df), len(vols_df))

    # Report on those after expiry date
    post_df = outside_df[outside_df.Date > outside_df.Expiry]
    if len(post_df):
        vols_df = post_df[post_df.Volume > 0.0]
        print "%s has %d prices after expiry, %d with positive volumes" % (this_contract, len(post_df), len(vols_df))
예제 #5
0
import time

current_millis = lambda: int(round(time.time() * 1000))

BACKTEST_START = '1990-01-03'
BACKTEST_END = str(datetime.date.today())
BACKTEST_INDEX = pandas.period_range(start=BACKTEST_START, end=BACKTEST_END, freq='B')

GUESS_INCEPTION = False

SEVEN_YRS = ((7 * 260) * offsets.BDay())
ALMOST_10_YRS = (((9 * 260) + 220) * offsets.BDay())

DEEP_PAST = BACKTEST_INDEX[0]
TODAY = BACKTEST_INDEX[-1]
DEEP_FUTURE = indaux.apply_offset(TODAY, ALMOST_10_YRS)

COMDTYS = (
    "AA_COMDTY",
    "BO_COMDTY",
    "CC_COMDTY",
    "CL_COMDTY",
    "CN_COMDTY",
    "CO_COMDTY",
    "CT_COMDTY",
    "CU_COMDTY",
    "C_COMDTY",
    "DU_COMDTY",
    "DW_COMDTY",
    "FN_COMDTY",
    "FV_COMDTY",
예제 #6
0
def process_symbol(sec_name, through_date):
    if METADATA.blacklist(sec_name):
        print("Skipping %s because it is blacklisted" % sec_name)
        return

    current_through = METADATA.current_through(sec_name)
    if current_through and (current_through >= through_date):
        print("Skipping %s because it is already current through %s" % (sec_name, current_through))
        return

    print("Processing %s" % sec_name)
    millis = current_millis()
    instrument = isecurity.factory(sec_name)

    md_dict = dict()
    for key in instrument.metadata.keys():
        md_dict[key] = instrument.metadata[key]
    write_blob(sec_name, None, price.PriceData.METADATA, md_dict)

    # Create a frame from the expiry map, which will have contract names as the index
    df = pandas.Series(md_dict['expiry_map'])
    df = df.map(lambda x: pandas.Period(x, freq="B"))
    df = df.to_frame('expiration')
    df.index.name = 'contract'

    # Add a column for the inception dates
    # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future
    inceptions = md_dict.get('inception_map', None)
    if inceptions is not None:
        contract_inception = pandas.Series(inceptions)
        contract_inception = contract_inception.map(lambda x: pandas.Period(x, freq="B"))
        df.loc[:, 'inception'] = contract_inception
    else:
        df.loc[:, 'inception'] = None

    # If doing an update, find the threshold that a contract needs to expire on or after to be relevant
    threshold_p = BACKTEST_INDEX[0]  # type: pandas.Period
    if current_through:
        correction_p = (pandas.Period(datetime.date.today(), freq='D') - METADATA.correction_days(sec_name)).asfreq('B')
        current_through_p = pandas.Period(current_through, freq='B')
        threshold_p = max(threshold_p, min(correction_p, current_through_p))

    # Gather start/end stats for all relevant contracts
    contracts_info = OrderedDict()
    for contract_name in df.index:
        if contract_name in BAD_CONTRACTS:
            continue

        METADATA.contract_map[contract_name] = sec_name

        expiry = df.loc[contract_name, 'expiration']

        # Discard contracts that expire before the beginning of the period
        end_p = indaux.apply_offset(expiry, offsets.BDay())
        if end_p < threshold_p:
            # print("Skipping %s for ending %s, before threshold %s" % (contract_name, end_p, threshold_p))
            continue

        # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future.
        # inception = df.loc[contract_name, 'inception']
        # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices.
        start_p = max(BACKTEST_INDEX[0],
                      indaux.apply_offset(expiry, -METADATA.bdays_per_contract(sec_name) * offsets.BDay()))

        # Convert to naive datetimes (for efficient use elsewhere)
        start_dt = start_p.to_timestamp().to_datetime()
        end_dt = end_p.to_timestamp().to_datetime()
        contracts_info[contract_name] = (start_p, end_p, start_dt, end_dt)

    if DO_INTRADAY and check_intraday(sec_name):
        write_intraday_data(sec_name, contracts_info, current_through)

    # This will hold multiple contracts' close prices, to be concatenated later
    close_dfs = list()

    # Gather the security's data one contract at a time, appending to those variables above
    for contract_name, (__, __, start_dt, end_dt) in contracts_info.iteritems():
        if DO_DAILY:
            contract_close_df = get_close_prices(contract_name, start_dt, end_dt)
            if contract_close_df is not None:
                close_dfs.append(contract_close_df)

    if DO_DAILY and (len(close_dfs) > 0):
        security_close_df = pandas.concat(close_dfs)
        # noinspection PyTypeChecker
        write_daily_data(sec_name, security_close_df, current_through)

    # Only indicate that we are "current" if neither daily or intraday were shut off
    if DO_DAILY and DO_INTRADAY:
        METADATA.current_through(sec_name, through_date)

    log_millis(millis, "Total time: ")