def process_symbol(sec_name): print("Processing %s" % sec_name) instrument = isecurity.factory(sec_name) md_dict = dict() for key in instrument.metadata.keys(): md_dict[key] = instrument.metadata[key] # Create a frame from the expiry map, which will have contract names as the index df = pandas.Series(md_dict['expiry_map']) df = df.map(lambda x: pandas.Period(x, freq="B")) df = df.to_frame('expiration') df.index.name = 'contract' # Add a column for the inception dates # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future inceptions = md_dict.get('inception_map', None) if inceptions is not None: contract_inception = pandas.Series(inceptions) contract_inception = contract_inception.map( lambda x: pandas.Period(x, freq="B")) df.loc[:, 'inception'] = contract_inception else: df.loc[:, 'inception'] = None # Gather start/end stats for all relevant contracts contracts_info = OrderedDict() for contract_name in df.index: if contract_name in BAD_CONTRACTS: continue METADATA[contract_name] = sec_name expiry = df.loc[contract_name, 'expiration'] end_p = indaux.apply_offset(expiry, offsets.BDay()) if end_p <= DEEP_PAST: continue # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future. # inception = df.loc[contract_name, 'inception'] # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices. start_p = max( indaux.apply_offset(expiry, -BDAYS_PER_CONTRACT * offsets.BDay()), DEEP_PAST) # Convert to timezone-aware timestamps and naive datetimes (for efficient use elsewhere) start_ts = start_p.to_timestamp().tz_localize(pytz.UTC) end_ts = end_p.to_timestamp().tz_localize(pytz.UTC) start_dt = start_p.to_timestamp().to_datetime() end_dt = end_p.to_timestamp().to_datetime() contracts_info[contract_name] = (start_ts, end_ts, start_dt, end_dt) get_all_intraday_data(instrument, contracts_info)
def process_symbol(sec_name): print("Processing %s" % sec_name) instrument = isecurity.factory(sec_name) md_dict = dict() for key in instrument.metadata.keys(): md_dict[key] = instrument.metadata[key] # Create a frame from the expiry map, which will have contract names as the index df = pandas.Series(md_dict['expiry_map']) df = df.map(lambda x: pandas.Period(x, freq="B")) df = df.to_frame('expiration') df.index.name = 'contract' # Add a column for the inception dates # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future inceptions = md_dict.get('inception_map', None) if inceptions is not None: contract_inception = pandas.Series(inceptions) contract_inception = contract_inception.map(lambda x: pandas.Period(x, freq="B")) df.loc[:, 'inception'] = contract_inception else: df.loc[:, 'inception'] = None # Gather start/end stats for all relevant contracts contracts_info = OrderedDict() for contract_name in df.index: if contract_name in BAD_CONTRACTS: continue METADATA[contract_name] = sec_name expiry = df.loc[contract_name, 'expiration'] end_p = indaux.apply_offset(expiry, offsets.BDay()) if end_p <= DEEP_PAST: continue # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future. # inception = df.loc[contract_name, 'inception'] # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices. start_p = max(indaux.apply_offset(expiry, -BDAYS_PER_CONTRACT * offsets.BDay()), DEEP_PAST) # Convert to timezone-aware timestamps and naive datetimes (for efficient use elsewhere) start_ts = start_p.to_timestamp().tz_localize(pytz.UTC) end_ts = end_p.to_timestamp().tz_localize(pytz.UTC) start_dt = start_p.to_timestamp().to_datetime() end_dt = end_p.to_timestamp().to_datetime() contracts_info[contract_name] = (start_p, end_p, start_dt, end_dt) get_all_intraday_data(instrument, contracts_info)
def process_symbol(sec_name, through_date): if METADATA.blacklist(sec_name): print("Skipping %s because it is blacklisted" % sec_name) return current_through = METADATA.current_through(sec_name) if current_through and (current_through >= through_date): print("Skipping %s because it is already current through %s" % (sec_name, current_through)) return print("Processing %s" % sec_name) millis = current_millis() instrument = isecurity.factory(sec_name) md_dict = dict() for key in instrument.metadata.keys(): md_dict[key] = instrument.metadata[key] write_blob(sec_name, None, price.PriceData.METADATA, md_dict) # Create a frame from the expiry map, which will have contract names as the index df = pandas.Series(md_dict['expiry_map']) df = df.map(lambda x: pandas.Period(x, freq="B")) df = df.to_frame('expiration') df.index.name = 'contract' # Add a column for the inception dates # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future inceptions = md_dict.get('inception_map', None) if inceptions is not None: contract_inception = pandas.Series(inceptions) contract_inception = contract_inception.map( lambda x: pandas.Period(x, freq="B")) df.loc[:, 'inception'] = contract_inception else: df.loc[:, 'inception'] = None # If doing an update, find the threshold that a contract needs to expire on or after to be relevant threshold_p = BACKTEST_INDEX[0] # type: pandas.Period if current_through: correction_p = (pandas.Period(datetime.date.today(), freq='D') - METADATA.correction_days(sec_name)).asfreq('B') current_through_p = pandas.Period(current_through, freq='B') threshold_p = max(threshold_p, min(correction_p, current_through_p)) # Gather start/end stats for all relevant contracts contracts_info = OrderedDict() for contract_name in df.index: if contract_name in BAD_CONTRACTS: continue METADATA.contract_map[contract_name] = sec_name expiry = df.loc[contract_name, 'expiration'] # Discard contracts that expire before the beginning of the period end_p = indaux.apply_offset(expiry, offsets.BDay()) if end_p < threshold_p: # print("Skipping %s for ending %s, before threshold %s" % (contract_name, end_p, threshold_p)) continue # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future. # inception = df.loc[contract_name, 'inception'] # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices. start_p = max( BACKTEST_INDEX[0], indaux.apply_offset( expiry, -METADATA.bdays_per_contract(sec_name) * offsets.BDay())) # Convert to naive datetimes (for efficient use elsewhere) start_dt = start_p.to_timestamp().to_datetime() end_dt = end_p.to_timestamp().to_datetime() contracts_info[contract_name] = (start_p, end_p, start_dt, end_dt) if DO_INTRADAY and check_intraday(sec_name): write_intraday_data(sec_name, contracts_info, current_through) # This will hold multiple contracts' close prices, to be concatenated later close_dfs = list() # Gather the security's data one contract at a time, appending to those variables above for contract_name, (__, __, start_dt, end_dt) in contracts_info.iteritems(): if DO_DAILY: contract_close_df = get_close_prices(contract_name, start_dt, end_dt) if contract_close_df is not None: close_dfs.append(contract_close_df) if DO_DAILY and (len(close_dfs) > 0): security_close_df = pandas.concat(close_dfs) # noinspection PyTypeChecker write_daily_data(sec_name, security_close_df, current_through) # Only indicate that we are "current" if neither daily or intraday were shut off if DO_DAILY and DO_INTRADAY: METADATA.current_through(sec_name, through_date) log_millis(millis, "Total time: ")
def analyze_contract(this_contract, inception, expiry, close, volume): if (expiry is None) or (isinstance(expiry, numbers.Number) and numpy.isnan(expiry)): print this_contract, "has no expiry date" expiry = TODAY if (inception is None) or (isinstance(inception, numbers.Number) and numpy.isnan(inception)): print this_contract, "has no inception date" if GUESS_INCEPTION: inception = indaux.apply_offset(expiry, -SEVEN_YRS) else: inception = DEEP_PAST contract_df = pandas.DataFrame.from_dict( OrderedDict(( ('Contract', this_contract), ('Inception', inception), ('Expiry', expiry), ('Close', close), ('Volume', volume)) ) ) contract_df.index.name = 'Date' contract_df = contract_df.reset_index() # Check for zeroes inside_df = contract_df[(contract_df.Date >= contract_df.Inception) & (contract_df.Date <= contract_df.Expiry)] zeroes_df = inside_df[inside_df.Close == 0.0] if len(zeroes_df): vols_df = zeroes_df[zeroes_df.Volume > 0.0] print "%s has %d zero prices, %d with positive volumes" % (this_contract, len(zeroes_df), len(vols_df)) # Reduce it to those rows that have prices with dates outside of the (Inception, Expiry) range outside_df = contract_df[(contract_df.Date < contract_df.Inception) | (contract_df.Date > contract_df.Expiry)] if len(outside_df) <= 0: return # Find the really old ones indicative of a one/two digit year problem too_old_date = indaux.apply_offset(expiry, -ALMOST_10_YRS) old_df = outside_df[outside_df.Date <= too_old_date] if len(old_df): vols_df = old_df[old_df.Volume > 0.0] print "%s has %d too old prices, %d with positive volumes" % (this_contract, len(old_df), len(vols_df)) # Find the really recent ones indicative of a one/two digit year problem too_recent_date = indaux.apply_offset(inception, ALMOST_10_YRS) recent_df = outside_df[outside_df.Date >= too_recent_date] if len(recent_df): vols_df = recent_df[recent_df.Volume > 0.0] print "%s has %d too recent prices, %d with positive volumes" % (this_contract, len(recent_df), len(vols_df)) # Get rid of those egregious outliers outside_df = outside_df[(outside_df.Date > too_old_date) & (outside_df.Date < too_recent_date)] # Report on those before inception date pre_df = outside_df[outside_df.Date < outside_df.Inception] if len(pre_df): vols_df = pre_df[pre_df.Volume > 0.0] print "%s has %d prices before inception, %d with positive volumes" % (this_contract, len(pre_df), len(vols_df)) # Report on those after expiry date post_df = outside_df[outside_df.Date > outside_df.Expiry] if len(post_df): vols_df = post_df[post_df.Volume > 0.0] print "%s has %d prices after expiry, %d with positive volumes" % (this_contract, len(post_df), len(vols_df))
import time current_millis = lambda: int(round(time.time() * 1000)) BACKTEST_START = '1990-01-03' BACKTEST_END = str(datetime.date.today()) BACKTEST_INDEX = pandas.period_range(start=BACKTEST_START, end=BACKTEST_END, freq='B') GUESS_INCEPTION = False SEVEN_YRS = ((7 * 260) * offsets.BDay()) ALMOST_10_YRS = (((9 * 260) + 220) * offsets.BDay()) DEEP_PAST = BACKTEST_INDEX[0] TODAY = BACKTEST_INDEX[-1] DEEP_FUTURE = indaux.apply_offset(TODAY, ALMOST_10_YRS) COMDTYS = ( "AA_COMDTY", "BO_COMDTY", "CC_COMDTY", "CL_COMDTY", "CN_COMDTY", "CO_COMDTY", "CT_COMDTY", "CU_COMDTY", "C_COMDTY", "DU_COMDTY", "DW_COMDTY", "FN_COMDTY", "FV_COMDTY",
def process_symbol(sec_name, through_date): if METADATA.blacklist(sec_name): print("Skipping %s because it is blacklisted" % sec_name) return current_through = METADATA.current_through(sec_name) if current_through and (current_through >= through_date): print("Skipping %s because it is already current through %s" % (sec_name, current_through)) return print("Processing %s" % sec_name) millis = current_millis() instrument = isecurity.factory(sec_name) md_dict = dict() for key in instrument.metadata.keys(): md_dict[key] = instrument.metadata[key] write_blob(sec_name, None, price.PriceData.METADATA, md_dict) # Create a frame from the expiry map, which will have contract names as the index df = pandas.Series(md_dict['expiry_map']) df = df.map(lambda x: pandas.Period(x, freq="B")) df = df.to_frame('expiration') df.index.name = 'contract' # Add a column for the inception dates # NOTE: Keeping this in case we want to write all contract pricess as separate blobs in the future inceptions = md_dict.get('inception_map', None) if inceptions is not None: contract_inception = pandas.Series(inceptions) contract_inception = contract_inception.map(lambda x: pandas.Period(x, freq="B")) df.loc[:, 'inception'] = contract_inception else: df.loc[:, 'inception'] = None # If doing an update, find the threshold that a contract needs to expire on or after to be relevant threshold_p = BACKTEST_INDEX[0] # type: pandas.Period if current_through: correction_p = (pandas.Period(datetime.date.today(), freq='D') - METADATA.correction_days(sec_name)).asfreq('B') current_through_p = pandas.Period(current_through, freq='B') threshold_p = max(threshold_p, min(correction_p, current_through_p)) # Gather start/end stats for all relevant contracts contracts_info = OrderedDict() for contract_name in df.index: if contract_name in BAD_CONTRACTS: continue METADATA.contract_map[contract_name] = sec_name expiry = df.loc[contract_name, 'expiration'] # Discard contracts that expire before the beginning of the period end_p = indaux.apply_offset(expiry, offsets.BDay()) if end_p < threshold_p: # print("Skipping %s for ending %s, before threshold %s" % (contract_name, end_p, threshold_p)) continue # NOTE: Use inception - 1 BDay if wanting to store all contract data in the future. # inception = df.loc[contract_name, 'inception'] # For now, we're just storing the last BDAYS_PER_CONTRACT business days of prices. start_p = max(BACKTEST_INDEX[0], indaux.apply_offset(expiry, -METADATA.bdays_per_contract(sec_name) * offsets.BDay())) # Convert to naive datetimes (for efficient use elsewhere) start_dt = start_p.to_timestamp().to_datetime() end_dt = end_p.to_timestamp().to_datetime() contracts_info[contract_name] = (start_p, end_p, start_dt, end_dt) if DO_INTRADAY and check_intraday(sec_name): write_intraday_data(sec_name, contracts_info, current_through) # This will hold multiple contracts' close prices, to be concatenated later close_dfs = list() # Gather the security's data one contract at a time, appending to those variables above for contract_name, (__, __, start_dt, end_dt) in contracts_info.iteritems(): if DO_DAILY: contract_close_df = get_close_prices(contract_name, start_dt, end_dt) if contract_close_df is not None: close_dfs.append(contract_close_df) if DO_DAILY and (len(close_dfs) > 0): security_close_df = pandas.concat(close_dfs) # noinspection PyTypeChecker write_daily_data(sec_name, security_close_df, current_through) # Only indicate that we are "current" if neither daily or intraday were shut off if DO_DAILY and DO_INTRADAY: METADATA.current_through(sec_name, through_date) log_millis(millis, "Total time: ")