def get_xsd(): # GHAZY MAHJUB: 5/1/2020: Could not get urllib3 to work, getting certificate errors with pyopenssl # Reverted to using requests library. # http = urllib3.PoolManager() response = requests.get(REMOTE_XSD_LOCATION) # response = http.urlopen("GET", REMOTE_XSD_LOCATION) LOGGER.info( "daily_nominal_yeild_curve.get_xsd(): HTTP RESPONSE STATUS CODE " + str(response.status_code)) zip_content = response.content zip_timestamp = datetime.now().isoformat() local_nominal_yield_curve_xsd_zip_loc_full_path = OSMuxImpl.get_proper_path( LOCAL_NOMINAL_YIELD_CURVE_XSD_ZIP_LOC) with open( local_nominal_yield_curve_xsd_zip_loc_full_path + "DailyTreasuryYieldCurveRateData" + str(zip_timestamp) + ".zip", 'wb') as f: f.write(zip_content) xsd_zip_file = ZipFile(local_nominal_yield_curve_xsd_zip_loc_full_path + "DailyTreasuryYieldCurveRateData" + str(zip_timestamp) + ".zip") local_nominal_yield_curve_xsd_loc = OSMuxImpl.get_proper_path( LOCAL_NOMINAL_YIELD_CURVE_XSD_LOC) LOGGER.info( "nominal_yield_curve.get_xsd(): extracting zip file into directory %s", local_nominal_yield_curve_xsd_loc) xsd_zip_file.extractall(path=local_nominal_yield_curve_xsd_loc) xsd_zip_file.close()
def get_xsd(): response = requests.get(REMOTE_XSD_LOCATION) LOGGER.info( "daily_real_yield_curve.get_xsd(): HTTP RESPONSE STATUS CODE " + str(response.status_code)) zip_content = response.content zip_timestamp = datetime.now().isoformat() local_real_yield_curve_xsd_zip_loc_full_path = OSMuxImpl.get_proper_path( LOCAL_REAL_YIELD_CURVE_XSD_ZIP_LOC) with open( local_real_yield_curve_xsd_zip_loc_full_path + "DailyTreasuryRealYieldCurveRateData" + str(zip_timestamp) + ".zip", 'wb') as f: f.write(zip_content) xsd_zip_file = ZipFile(local_real_yield_curve_xsd_zip_loc_full_path + "DailyTreasuryRealYieldCurveRateData" + str(zip_timestamp) + ".zip") local_real_yield_curve_xsd_loc = OSMuxImpl.get_proper_path( LOCAL_REAL_YIELD_CURVE_XSD_LOC) LOGGER.info( "real_yield_curve.get_xsd(): extracting zip file into directory %s", local_real_yield_curve_xsd_loc) xsd_zip_file.extractall(path=local_real_yield_curve_xsd_loc + "DailyTreasuryRealYieldCurveRateData.xsd/") xsd_zip_file.close()
def cboe_selenium_connect(): driver = webdriver.Chrome('/Users/ghazymahjub/chromedriver/chromedriver') driver.get(CBOE_LOGIN_URL) driver.find_element_by_id("ContentTop_C022_emailOrUserId").send_keys("*****@*****.**") driver.find_element_by_id("ContentTop_C022_Password").send_keys("8adxQBeFF$d!$qp") driver.find_element_by_id("ContentTop_C022_btnLogin").click() driver.implicitly_wait(15) import time for url_key in sorted(set(url_dict.keys())): LOGGER.info("cboe.cboe_selenium_connect(): sleep for 15 seconds, zzzzzzzzz....") time.sleep(15) LOGGER.info("cboe.cboe_selenium_connect(): woke up!") LOGGER.info("cboe.cboe_selenium_connect(): getting %s", url_dict[url_key][0]) driver.get(url_dict[url_key][0]) LOGGER.info("cboe.cboe_selenium_connect(): sleep for 15 seconds, zzzzzzzzz....") time.sleep(15) LOGGER.info("cboe.cboe_selenium_connect(): woke up!") downloaded_file_path = OSMuxImpl.get_proper_path(DOWNLOAD_DATA_DIR) + url_dict[url_key][1] move_to_file_path = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + url_dict[url_key][1] try: shutil.move(downloaded_file_path, move_to_file_path) except FileNotFoundError as fnfe: LOGGER.error("cboe.cboe_selenium_connect(): shutil move failed with... %s from file: " "%s, to file: %s", fnfe.__str__(), downloaded_file_path, move_to_file_path) driver.quit() return
def __init__(self): self.logger = get_logger() self.source = 'fred' self.api_key = SecureKeysAccess.get_vendor_api_key_static( vendor=str.upper(self.source)) self.fred_pwd = OSMuxImpl.get_proper_path('/workspace/data/fred/') self.seaborn_plots_pwd = OSMuxImpl.get_proper_path( '/workspace/data/seaborn/plots/') self.fred = Fred(api_key=self.api_key)
def __init__(self, sec_type_list=['cs', 'et']): self.logger = get_logger() self.iex_trading_root_url = "https://api.iextrading.com/1.0" self.get_symbols_universe_url = "/ref-data/symbols" self.sec_type_list = sec_type_list self.master_sector_indusry_df = pd.DataFrame( columns=['Sector', 'Industry']) self.master_sector_industry_file = OSMuxImpl.get_proper_path( '/workspace/data/IEX/') + 'master_sector_industry.csv' self.iex_html_path = OSMuxImpl.get_proper_path( '/workspace/data/IEX/html/') self.co_earnings_path = OSMuxImpl.get_proper_path( '/workspace/data/IEX/earnings/')
def __init__( self, stock_universe_filename='Russ3K_holdings', use_iex_trading_symbol_universe=False, sec_type_list=['cs', 'et'], daily_window_sizes=[30, 60, 90, 120, 180, 270], weekly_window_sizes=[ 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52 ], monthly_window_sizes=[3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36]): self.logger = get_logger() self.sm = StatisticalMoments() self.stock_universe_data = None self.stock_universe_filename = stock_universe_filename self.use_iex_trading_symbol_universe = use_iex_trading_symbol_universe self.sec_type_list = sec_type_list self.plotly_histograms_dir = OSMuxImpl.get_proper_path( '/workspace/data/plotly/histograms/') self.daily_window_sizes = daily_window_sizes self.weekly_window_sizes = weekly_window_sizes self.window_size_dict = { 'D': daily_window_sizes, 'W': weekly_window_sizes, 'M': monthly_window_sizes } self.index_ticker_dict = { 'SPY': 'SP500' } #, 'QQQ': 'NQ100', 'IWM': 'Russell2000', 'DIA': 'Dow30'} self.index_ticker_df = pd.DataFrame(data=list( self.index_ticker_dict.items()), columns=['Symbol', 'Name'])
def read_historical_voltermstruct_csv(eod_run=True, which_product=None): """ Read the historical files into a dataframe and then write out to csv and insert into db. the eod_run parameter asks is equal to True as a Default. This means you are not running a historical run but an actual EOD daily insert. That means we can insert a subset of the data, like the past 7 days for example, as opposed to the entire historical file. This will save us a lot of time when doing EOD runs. :param which_product: :param eod_run: :return: """ if which_product is not None: prod_list = [which_product] else: prod_list = url_dict.keys() for vol_prod in prod_list: move_to_file_path = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + url_dict[vol_prod][1] LOGGER.info("cboe.read_historical_voltermstructure_csv(): loading csv file %s into dataframe", move_to_file_path) df = pd.read_csv(move_to_file_path, skiprows=3, parse_dates=True, index_col=0, header=None) df.columns = ['Volume', 'd_Open', 'd_High', 'd_Low', 'd_Close'] df.index.name = 'Date' df.drop(['Volume'], axis=1, inplace=True) df.to_csv(move_to_file_path + ".clean") # store into database table if eod_run: # reindex using the last 7 days last_day_in_df = df.index[-1].to_pydatetime() first_day_reindex = last_day_in_df - timedelta(days=7) df_ins = df.loc[str(first_day_reindex):] else: df_ins = df vix_term_structure_to_db(df_ins, symbol_to_insert=vol_prod)
def pyparse_xml(xml_file): xsd_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XSD_LOC) + XSD_ZIP_DIR_PATH + \ LOCAL_NOMINAL_YIELD_CURVE_XSD LOGGER.info("nominal_yield_curve.parse_xml(): Using xsd schema at %s ", xsd_file) try: schema = xmlschema.XMLSchema(xsd_file, validation="strict") schema.validate(xml_file) if not schema.is_valid(xml_file): LOGGER.error( "nominal_yield_curve.parse_xml(): %s xml file is NOT validate against schema %s !", xml_file, xsd_file) LOGGER.error( "nominal_yield_curve.parse_xml(): is today a weekend day? empty xml files on weekends, check %s", xml_file) else: LOGGER.info( "nominal_yield_curve.parse_xml(): %s xml file is valid against schema %s ! ", xml_file, xsd_file) except XMLSchemaValidationError: LOGGER.error("nominal_yield_curve.pyparse_xml(): validation failed!") except XMLSchemaParseError: LOGGER.error( "nominal_yield_curve.pyparse_xml(): xmlschemaparse error!") xt = ElementTree.parse(xml_file) root = xt.getroot() yc_xml_to_dict = schema.to_dict(xml_file) top_level_keys = yc_xml_to_dict.keys() entry_yc_xml_list = yc_xml_to_dict['entry'] return_dict = {} for entry_dict in entry_yc_xml_list: content_dict = entry_dict['content'] content_meta_properties = content_dict['m:properties'] date_of_rate_data = content_meta_properties['d:NEW_DATE'] sub_return_dict = {} for cmp_key in sorted(set(content_meta_properties.keys())): act_data_dict = content_meta_properties[cmp_key] for add_key in sorted(set(act_data_dict.keys())): act_data = act_data_dict[add_key] # there is an @m:type key, which is the Entity Data Model data type # for example: the d:NEW_DATE key is @m:type Edm.DateTime # for example: the d:BC_7EAR key is @m:type is Edm.Double # then there is a '$' key, which is the actual data. # print("cmp_key", cmp_key) # print("add_key", add_key) # print("act_data", act_data) if sub_return_dict.get(cmp_key, None) is None: value_dict = {add_key: act_data} sub_return_dict[cmp_key] = value_dict else: value_dict = sub_return_dict.get(cmp_key) value_dict[add_key] = act_data # we don't need the below since it's a pointer. sub_return_dict[cmp_key] = value_dict return_dict[date_of_rate_data['$']] = sub_return_dict df = pd.DataFrame.from_dict(data=return_dict, orient='index') return df
def run_month_insert(month=0, year=0): if month is 0: month = datetime.now().month if year is 0: year = datetime.now().year MONTH_YIELD_CURVE_URL = "https://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData?" \ "$filter=month(NEW_DATE)%20eq%20" + str(month) + \ "%20and%20year(NEW_DATE)%20eq%20" + str(year) url_response = requests.get(url=MONTH_YIELD_CURVE_URL) LOGGER.info( "daily_nominal_yeild_curve.write_response_file(): HTTP RESPONSE STATUS CODE " + str(url_response.status_code)) month_tydotgov_nominal_yields_xml_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XML_LOC) + \ "DailyTreasuryYieldCurveRateData" + \ date(year, month, 1).isoformat() + ".xml" with open(month_tydotgov_nominal_yields_xml_file, 'wb') as f: f.write(url_response.content) # check empty xml entry_count = check_empty_xml(month_tydotgov_nominal_yields_xml_file) if entry_count is 0: LOGGER.error( "nominal_yield_curve:run_daily_insert(): the xml file %s is empty, nothing to insert", month_tydotgov_nominal_yields_xml_file) return fix_invalid_xml(month_tydotgov_nominal_yields_xml_file) LOGGER.info( "nominal_yield_curve.daily_insert(): ftp'ing into mysql server xml file %s", month_tydotgov_nominal_yields_xml_file) sftp_xml(month_tydotgov_nominal_yields_xml_file) LOGGER.info( "nominal_yield_curve.daily_insert(): running parse_xml() on %s ", month_tydotgov_nominal_yields_xml_file) df_daily = pyparse_xml(month_tydotgov_nominal_yields_xml_file) yc_entries = df_daily.apply(vectorized_insert_from_pyparse, axis=1) session = get_db_session() from sqlalchemy.exc import IntegrityError as SqlAlchemyIntegrityError from pymysql.err import IntegrityError as PymysqlIntegrityError session.flush() for yc_entry in yc_entries.iteritems(): LOGGER.info("nominal_yield_curve:inserting yield curve entry %s %s", yc_entry[0], yc_entry[1]) yc_entry_db = session.query(DailyNominalUsTyGovYieldCurve).filter_by( Id=yc_entry[1].Id).first() if yc_entry_db is None: LOGGER.info( "nominial_yield_curve.run_month_insert(%s,%s): entries for date %s do not exist, inserting...", str(year), str(month), yc_entry[1].NEW_DATE) session.add(yc_entry[1]) else: LOGGER.info( "nominial_yield_curve.run_month_insert(%s,%s): entries for date %s do exist, updating...", str(year), str(month), yc_entry[1].NEW_DATE) yc_entry_db.set_all(yc_entry[1]) session.commit()
def __init__(self, start_date, end_date, symbols): super().__init__() self.logger = get_logger() self.start_date = start_date self.end_date = end_date self.symbols = symbols self.get_px_adj_close = lambda x: web.DataReader( x, 'yahoo', start=self.start_date, end=self.end_date)['Adj Close'] self.get_px_open = lambda x: web.DataReader( x, 'yahoo', start=self.start_date, end=self.end_date)['Open'] self.get_px_high = lambda x: web.DataReader( x, 'yahoo', start=self.start_date, end=self.end_date)['High'] self.get_px_low = lambda x: web.DataReader( x, 'yahoo', start=self.start_date, end=self.end_date)['Low'] self.get_volume = lambda x: web.DataReader( x, 'yahoo', start=self.start_date, end=self.end_date)['Volume'] self.get_px_all = lambda x: web.DataReader( x, 'yahoo', start=self.start_date, end=self.end_date) self.all_px_df = None self.adj_close_px_df = None self.open_px_df = None self.high_px_df = None self.low_px_df = None self.volume_df = None self.local_yahoo_data_path = '/workspace/data/yahoo/' self.local_file_type = '.csv' self.local_data_file_pwd = OSMuxImpl.get_proper_path( self.local_yahoo_data_path) self.local_adj_close_file_name = '_'.join( self.symbols) + '_AdjClose' + self.local_file_type self.local_open_file_name = '_'.join( self.symbols) + '_Open' + self.local_file_type self.local_high_file_name = '_'.join( self.symbols) + '_High' + self.local_file_type self.local_low_file_name = '_'.join( self.symbols) + '_Low' + self.local_file_type self.local_volume_file_name = '_'.join( self.symbols) + '_Volume' + self.local_file_type self.local_all_file_name = '_'.join( self.symbols) + self.local_file_type self.logger.info("YahooDataObject.__init__.local_data_file_pwd: %s", str(self.local_data_file_pwd)) self.yahoo_client_id = "dj0yJmk9Q09ZdnVWMlNEdzdxJmQ9WVdrOWNFTnNRMFV3TkRRbWNHbzlNQS0tJnM9Y29uc3VtZXJzZWNyZXQmeD1mNA--" self.yahoo_client_secret = "41f7939217e13b297ce3862be55c5b9e4b77cab8"
def download_quandl_stock_csv_file(self, idx): url = "https://www.quandl.com/api/v3/datasets/EOD/" + idx + ".csv?api_key=" + self.quandl_auth_token response = requests.get(url) local_data_file_pwd = OSMuxImpl.get_proper_path( self.local_stock_data_path) total_local_file_name = local_data_file_pwd + idx + ".csv" # write out the response to file with open(total_local_file_name, 'wb') as f: f.write(response.content) self.logger.info("QuandlSymbolInterface.download_quandl_stock_csv_file(): HTTP Response Status Code %s " + \ str(response.status_code))
def prepare_heritability_case_study_data(self): datacamp_dir = OSMuxImpl.get_proper_path(user_provided_path='/workspace/data/datacamp/') scandens_csv = datacamp_dir + "scandens_beak_depth_heredity.csv" fortis_csv = datacamp_dir + "fortis_beak_depth_heredity.csv" df_scandens = pd.read_csv(scandens_csv, sep = ",", header = 0) df_fortis = pd.read_csv(fortis_csv, sep = ",", header = 0) bd_offspring_scandens = df_scandens.mid_offspring.values bd_parent_scandens = df_scandens['mid_parent'].values bd_offspring_fortis = df_fortis['Mid-offspr'].values bd_parent_fortis = (df_fortis['Male BD'].values + df_fortis['Female BD'].values)/2.0 return (bd_offspring_scandens, bd_parent_scandens, bd_offspring_fortis, bd_parent_fortis)
def check_valid_xml(xml_file, xsd_file): error_syntax_log = OSMuxImpl.get_proper_path(LOCAL_XML_PARSE_ERROR_LOG) with open(xsd_file, 'r') as schema_file: schema_to_check = schema_file.read() # open and read xml file with open(xml_file, 'r') as xml_f: xml_to_check = xml_f.read() xmlschema_doc = etree.parse(StringIO(schema_to_check)) xml_schema = etree.XMLSchema(xmlschema_doc) # parse xml try: doc = etree.parse(StringIO(xml_to_check)) LOGGER.info( "nominal_yield_curve.check_valid_xml(): all is well so far, parsed successfully" " xml file %s", xml_file) # check for file IO error except IOError as io_error: LOGGER.error( "nominal_yield_curve.check_valid_xml(): invalid file, exception is %s", str(io_error)) # check for XML syntax errors except etree.XMLSyntaxError as err: LOGGER.error( "nominal_yield_curve.check_xml_valid(): schema validation error %s, see error_schema.log!", str(err)) with open(error_syntax_log, 'w') as error_log_file: error_log_file.write(str(err.error_log)) quit() except: LOGGER.error( "nominal_yield_curve.check_xml_valid(): unknown error, quitting..." ) quit() # validate against schema try: xml_schema.assertValid(doc) LOGGER.info( "nominal_yield_curve.check_xml_valid(): XML Valid, schema validation ok %s %s", xsd_file, xml_file) except etree.DocumentInvalid as err: LOGGER.error( "nominal_yield_curve.check_xml_valid(): schema validation error %s %s, see error_schema.log!", xsd_file, xml_file) with open(error_syntax_log, 'w') as error_log_file: error_log_file.write(str(err.error_log)) quit() except: LOGGER.error( "nominal_yield_curve.check_xml_valid(): unknown error, quitting..." ) quit()
def __init__(self, tickers, writer_filename=None): self.tickers = tickers self.writer_filename = writer_filename self.logger = get_logger() self.source = 'simfin' self.api_key = SecureKeysAccess.get_vendor_api_key_static( vendor=str.upper(self.source)) self.simfin_pwd = OSMuxImpl.get_proper_path('/workspace/data/simfin/') if writer_filename is not None: self.writer = self.get_writer(self.simfin_pwd + writer_filename) else: self.writer = None self.sim_ids = self.get_sim_ids(tickers)
def validate_show_document(html_document, html_filename, html_dir, viewHtml=False): html_document.validate() proper_dir = OSMuxImpl.get_proper_path(html_dir) proper_filename = proper_dir + html_filename with open(proper_filename, "w", encoding='utf-8') as f: f.write(file_html(html_document, INLINE, "Data Tables")) LOGGER.info( "extend_bokeh_datatables.ExtendBokeh.validate_show_document(): wrote %s in dir %s ", html_filename, proper_dir) if viewHtml is not False: view(proper_filename)
def prepare_finch_beak_case_study_data(self): """Specific function to prepare the finch beak case study data. Simply read in the data from csv into Panda's dataframe, and then return individual columns of data as numpy arrays. """ datacamp_dir = OSMuxImpl.get_proper_path(user_provided_path='/workspace/data/datacamp/') finch_beak_1975 = datacamp_dir + "finch_beaks_1975.csv" finch_beak_2012 = datacamp_dir + "finch_beaks_2012.csv" df_1975 = pd.read_csv(finch_beak_1975, sep = ',', header = 0) df_2012 = pd.read_csv(finch_beak_2012, sep = ',', header = 0) bd_1975 = df_1975.bdepth.values bl_1975 = df_1975.blength.values bd_2012 = df_2012['Beak depth, mm'].values bl_2012 = df_2012['Beak length, mm'].values return (bd_1975, bd_2012, bl_1975, bl_2012)
def validate_show_document(html_document, html_filename, html_dir, viewHtml=False): from bokeh.embed import file_html from bokeh.resources import INLINE from bokeh.util.browser import view print("the html document", file_html(html_document, INLINE, "CorrelTables")) html_document.validate() proper_dir = OSMuxImpl.get_proper_path(html_dir) proper_filename = proper_dir + html_filename with open(proper_filename, "w") as f: f.write(file_html(html_document, INLINE, "Data Tables")) LOGGER.info( "extend_bokeh_datatables.ExtendBokeh.validate_show_document(): wrote %s in dir %s ", html_filename, proper_dir) if viewHtml is not False: view(proper_filename)
def screenscrape_daily_volvals(): return_dict = {} as_of_date = None session = get_db_session() for vol_prod in cboe_dashboard_urls_dict.keys(): html_text = requests.get(cboe_dashboard_urls_dict[vol_prod]).text b_soup = BeautifulSoup(html_text, 'html.parser') attrs = { 'id': 'div-summary' } keys_list = [] values_list = [] for div_elem in b_soup.find_all('div', attrs=attrs): for sub_elem in div_elem.find_all('h5'): keys_list.append(sub_elem.text) for sub_elem in div_elem.find_all('span'): values_list.append(sub_elem.text) as_of_date = values_list[-1] as_of_date = as_of_date.split("As of ")[1] vol_prod_dict = dict(zip(keys_list, values_list)) vol_prod_dict['AsOfDate'] = re.sub('\s+',' ',as_of_date).strip() as_of_date = vol_prod_dict['AsOfDate'] as_of_date_dt = datetime.strptime(as_of_date, '%Y-%m-%d %H:%M:%S (ET)') as_of_date_date = as_of_date_dt.date() vts = VixTermStructure(Id=as_of_date_date, Symbol=vol_prod, Change=vol_prod_dict['Change'], Open=float(vol_prod_dict['Open']), High=float(vol_prod_dict['High']), Low=float(vol_prod_dict['Low']), Close=float(vol_prod_dict['Prev Close']), LastSale=float(vol_prod_dict['Last Sale']), LastTime=as_of_date_dt) vts_to_db(vts, session) return_dict[vol_prod] = vol_prod_dict df = pd.DataFrame(data=return_dict) daily_vals_csv_file = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + "daily_vol_vals_" + str(as_of_date) + ".csv" as_of_date_dt = datetime.strptime(as_of_date, '%Y-%m-%d %H:%M:%S (ET)') as_of_date_dt = as_of_date_dt.replace(tzinfo=timezone("EST")) df.to_csv(daily_vals_csv_file) return df
def __init__(self, class_of_data, local_symbol, local_file_type): # class of data ################################ # 'FOREX' # 'EURODOLLARS' # 'INTEREST_RATES' # 'ECONOMIC_INDICATORS_UNADJ' # 'ECONOMIC_INDICATORS_SEAS_ADJ' # 'FED_FORECASTS' # 'MISC' # 'STOCKS' # local_symbol ################################ # See the static dictionaries in quandl_interface.py. # The local symbol is the key in the key,value pairs that make up those non-reversed dictionaries. # local_file_type ################################ # usually .csv super().__init__() self.qsi = QuandlSymbolInterface() self.class_of_data = class_of_data self.local_symbol = local_symbol self.local_file_type = local_file_type self.quandl_symbol = self.qsi.get_quandl_symbol( class_of_data, local_symbol) self.local_data_file_pwd = OSMuxImpl.get_proper_path( self.qsi.get_local_quandl_data_path(class_of_data)) self.logger.info("QuandlDataObject.__init__.local_data_file_pwd " + self.local_data_file_pwd) self.local_data_file_name = local_symbol + self.local_file_type self.newest_date_at_quandl_dt = None self.last_time_file_modified = None if self.does_local_file_exist() and self.is_local_file_old() == False: self.df = self.get_df_from_local_csv() else: self.df = self.get_from_quandl()
def batch_historical_get(no_ftp=False): # connect to treasury.gov xml feed. all_hist_tydotgov_nominal_yields_xml_url = "https://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData" url_response = requests.get(url=all_hist_tydotgov_nominal_yields_xml_url) hist_tydotgov_nominal_yields_xml_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XML_LOC) + \ "DailyTreasuryYieldCurveRateData-Historical.xml" # write out the response to file with open(hist_tydotgov_nominal_yields_xml_file, 'wb') as f: f.write(url_response.content) LOGGER.info( "historical_nominal_yield_curve.write_response_file(): HTTP Response Status Code " + str(url_response.status_code)) entry_count = check_empty_xml(hist_tydotgov_nominal_yields_xml_file) if entry_count is 0: LOGGER.error( "nominal_yield_curve.batch_historical_get(): historical xml file is empty %s ", hist_tydotgov_nominal_yields_xml_file) no_ftp = True if not no_ftp: sftp_xml(hist_tydotgov_nominal_yields_xml_file) return hist_tydotgov_nominal_yields_xml_file
def get_historical_csv(key=None, url=None): if url is None: url = "https://www.cboe.com/chart/GetDownloadData/" payload = {'RequestSymbol': 'VIX1Y'} else: payload = None url_response = requests.get(url=url, params=payload) if url_response.status_code != 200: LOGGER.error("cboe.get_historical_csv(): HTTP RESPONSE STATUS CODE %s", str(url_response.status_code)) LOGGER.error("cboe.get_historical_csv(): url request failed %s", url) return url_response.status_code attachmentFilename = url_response.headers['Content-Disposition'] csv_filename = attachmentFilename.split('=')[1].replace('"','') LOGGER.info("cboe.get_historical_csv(): HTTP RESPONSE STATUS CODE " + str(url_response.status_code)) historical_csv_file = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + csv_filename with open(historical_csv_file, 'wb') as f: for chunk in url_response.iter_content(chunk_size=128): f.write(chunk) f.close() return url_response.status_code
def __init__(self, **kwargs): self.logger = get_logger() self.file_mod_time_dict = {} self.symbols = None self.start_date = "" self.end_date = "" self.source = "tiingo" self.api_key = SecureKeysAccess.get_vendor_api_key_static( vendor=str.upper(self.source)) for key, value in kwargs.items(): if key == 'symbols': self.symbols = value self.logger.info("TiingoDataObject.__init__.symbols: %s", str(self.symbols)) elif key == 'start_date': self.start_date = value self.logger.info("TiingoDataObject.__init__.start_date: %s", str(self.start_date)) elif key == 'end_date': self.end_date = value self.logger.info("TiingoDataObject.__init__.end_date: %s", str(self.end_date)) elif key == 'source': self.source = value self.logger.info("TiingoDataObject.__init__.source: %s", str(self.source)) self.get_px_all_tiingo = lambda x: web.get_data_tiingo( x, start='2010-01-01', end=str(pd.to_datetime('today')).split(' ')[0], api_key=self.api_key) self.all_px_df = None self.local_tiingo_data_path = '/workspace/data/tiingo/stocks/' self.local_file_type = '.csv' self.local_data_file_pwd = OSMuxImpl.get_proper_path( self.local_tiingo_data_path)
def get_historical_vx_data_from_cboe(contract_expiry_date): from os import path total_url = CBOE_FUTURES_BASE_URL + VX_CSV_REMOTE_DIR + str(contract_expiry_date) file_prefix = "CFE_" file_suffix = "_VX.csv" keys_list = list(MONTHLY_EXPIRY_MONTH_CODE_MAPPING.keys()) values_list = list(MONTHLY_EXPIRY_MONTH_CODE_MAPPING.values()) year = datetime.strftime(contract_expiry_date, "%y") month_code = keys_list[values_list.index(contract_expiry_date.month)] full_front_filename = file_prefix + month_code + str(year) + file_suffix path_to_front_month_file = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + full_front_filename if contract_expiry_date<datetime.now().date() and path.exists(path_to_front_month_file): # this contract is expired already. We may have it in the flat file system. return 200 LOGGER.info("cboe.get_historical_vx_data_from_cboe(%s): getting historical vx data from url %s...", contract_expiry_date, total_url) return_value = get_historical_csv(url=total_url) if return_value == 404 and contract_expiry_date > \ (datetime.now().date() + timedelta(days=180)): LOGGER.info("cboe.get_historical_csv(): 404 Page not found, most likely this is ok...") elif return_value == 404: LOGGER.error("cboe.get_historical_csv(): 404 Page not found, there is something WRONG! %s", total_url) return return_value
def run_daily_insert(): TODAY_DATE = datetime.now() DAILY_YIELD_CURVE_URL = "https://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData?" \ "$filter=day(NEW_DATE)%20eq%20" + str(TODAY_DATE.day) + \ "%20and%20month(NEW_DATE)%20eq%20" + str(TODAY_DATE.month) + \ "%20and%20year(NEW_DATE)%20eq%20" + str(TODAY_DATE.year) url_response = requests.get(url=DAILY_YIELD_CURVE_URL) LOGGER.info( "daily_nominal_yeild_curve.write_response_file(): HTTP RESPONSE STATUS CODE " + str(url_response.status_code)) daily_tydotgov_nominal_yields_xml_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XML_LOC) + \ "DailyTreasuryYieldCurveRateDataToday.xml" with open(daily_tydotgov_nominal_yields_xml_file, 'wb') as f: f.write(url_response.content) # check empty xml entry_count = check_empty_xml(daily_tydotgov_nominal_yields_xml_file) if entry_count is 0: LOGGER.error( "nominal_yield_curve:run_daily_insert(): the xml file %s is empty, nothing to insert", daily_tydotgov_nominal_yields_xml_file) return fix_invalid_xml(daily_tydotgov_nominal_yields_xml_file) LOGGER.info( "nominal_yield_curve.daily_insert(): ftp'ing into mysql server xml file %s", daily_tydotgov_nominal_yields_xml_file) sftp_xml(daily_tydotgov_nominal_yields_xml_file) LOGGER.info( "nominal_yield_curve.daily_insert(): running parse_xml() on %s ", daily_tydotgov_nominal_yields_xml_file) df_daily = pyparse_xml(daily_tydotgov_nominal_yields_xml_file) yc_entries = df_daily.apply(vectorized_insert_from_pyparse, axis=1) session = get_db_session() for yc_entry in yc_entries.iteritems(): LOGGER.info("nominal_yield_curve:inserting yield curve entry %s %s", yc_entry[0], yc_entry[1]) session.add(yc_entry[1]) session.commit()
import seaborn as sns import matplotlib.pyplot as plt from matplotlib import rcParams import numpy as np from root.nested import get_logger sns.set() rcParams.update({'figure.autolayout': True}) LOGGER = get_logger() YIELD_DIFF_RETS_PERIODS = [1, 2, 5, 10, 20, 40, 60, 120, 252] PAR_BOND_PRICE = 1000.0 TRACK_INDEX_NOMINAL_POINT_YIELD_SPREAD_CSV = "workspace/data/treasurygov/analysis/" HTML_FILES_DIR = "workspace/data/bokeh/html/" SEABORN_PNG_FILES_DIR = "/workspace/data/seaborn/png/" TRACK_INDEX_NOMINAL_POINT_YIELD_SPREAD_CSV = \ OSMuxImpl.get_proper_path(TRACK_INDEX_NOMINAL_POINT_YIELD_SPREAD_CSV) + "tpyls.csv" """ Get bond price from YTM """ def bond_price(par, T, ytm, coup, freq=2): """ BROKEN FUNCTION, DO NOT USE! :param par: :param T: :param ytm: :param coup: :param freq: :return: """ freq = float(freq) periods = T * freq
def correlation_heatmaps(df, duration_point, risk_asset_list): LOGGER.info( "yield_curve_risk_pricer.make_spread(): running make_spread() function..." ) col_list = df.columns risk_off_diffs_list = [] risk_off_pct_change_s_list = [] risk_off_derived_px_diffs_list = [] risk_off_derived_px_pctChange_list = [] for dp in duration_point: label = nominal_yield_curve.DURATION_TO_DBCOL_MAPPING[dp] for ydr_per in YIELD_DIFF_RETS_PERIODS: risk_off_diffs = label + '_' + str(ydr_per) + 'D_DIFF' risk_off_pct_change_s = label + '_' + str(ydr_per) + 'D_PCT_CHANGE' risk_off_derived_px_diffs = label + '_UPDATED_PRICE_' + str( ydr_per) + 'D_DIFF' risk_off_derived_px_pctChange = label + '_UPDATED_PRICE_' + str( ydr_per) + 'D_PCT_CHANGE' risk_off_diffs_list.append(risk_off_diffs) risk_off_pct_change_s_list.append(risk_off_pct_change_s) risk_off_derived_px_diffs_list.append(risk_off_derived_px_diffs) risk_off_derived_px_pctChange_list.append( risk_off_derived_px_pctChange) diffs_intersection_of_lists = \ list(sorted(set(risk_off_diffs_list).intersection(col_list))) pct_change_s_intersection_of_lists = \ list(sorted(set(risk_off_pct_change_s_list).intersection(col_list))) derived_px_diffs_intersection_of_lists = \ list(sorted(set(risk_off_derived_px_diffs_list).intersection(col_list))) derived_px_pctChange_intersection_of_lists = \ list(sorted(set(risk_off_derived_px_pctChange_list).intersection(col_list))) for risk_on in risk_asset_list: risk_on_diffs_list = [] risk_on_pct_change_s_list = [] for ydr_per in YIELD_DIFF_RETS_PERIODS: risk_on_diffs = risk_on + '_' + str(ydr_per) + 'D_DIFF' risk_on_pct_change_s = risk_on + '_' + str( ydr_per) + 'D_PCT_CHANGE' risk_on_diffs_list.append(risk_on_diffs) risk_on_pct_change_s_list.append(risk_on_pct_change_s) diffs_intersection_risk_on_list = list( sorted(set(risk_on_diffs_list).intersection(col_list))) pct_change_s_intersection_risk_on_list = list( sorted(set(risk_on_pct_change_s_list).intersection(col_list))) corr_matrix_diffRiskOff_diffRiskOn = \ df[diffs_intersection_of_lists + diffs_intersection_risk_on_list].corr() corr_matrix_diffRiskOff_pctChangeRiskOn = \ df[diffs_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr() corr_matrix_pctChangeRiskOff_diffRiskOn = \ df[pct_change_s_intersection_of_lists + diffs_intersection_risk_on_list].corr() corr_matrix_pctChangeRiskOff_pctChangeRiskOn = \ df[pct_change_s_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr() # derived px diff/pct_change versus risk-on diff/pctChange corr_matrix_derivedPxDiffRiskOff_diffRiskOn = \ df[derived_px_diffs_intersection_of_lists + diffs_intersection_risk_on_list].corr() corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn = \ df[derived_px_diffs_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr() corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn = \ df[derived_px_pctChange_intersection_of_lists + diffs_intersection_risk_on_list].corr() corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn = \ df[derived_px_pctChange_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr() plt.figure(figsize=(27, 18)) LOGGER.info( "yield_curve_risk_pricer.correlation_heatmap(): " "corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn") ax = sns.heatmap( corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn.png") # plt.tight_layout() top_correl_df = find_top_correlations( corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn) print(top_correl_df) make_spread_price(top_correl_df, df) return plt.figure(figsize=(27, 18)) LOGGER.info( "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_diffRiskOff_diffRiskOn" ) ax = sns.heatmap(corr_matrix_diffRiskOff_diffRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_diffRiskOff_diffRiskOn.png") top_correl_df = find_top_correlations( corr_matrix_diffRiskOff_diffRiskOn) make_spread_price(top_correl_df, df) plt.figure(figsize=(27, 18)) LOGGER.info( "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_diffRiskOff_pctChangeRiskOn" ) ax = sns.heatmap(corr_matrix_diffRiskOff_pctChangeRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) # plt.tight_layout() ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_diffRiskOff_pctChangeRiskOn.png") find_top_correlations(corr_matrix_diffRiskOff_pctChangeRiskOn) plt.figure(figsize=(27, 18)) LOGGER.info( "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_pctChangeRiskOff_diffRiskOn" ) ax = sns.heatmap(corr_matrix_pctChangeRiskOff_diffRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) # plt.tight_layout() ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_pctChangeRiskOff_diffRiskOn.png") find_top_correlations(corr_matrix_pctChangeRiskOff_diffRiskOn) plt.figure(figsize=(27, 18)) LOGGER.info( "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_pctChangeRiskOff_pctChangeRiskOn" ) ax = sns.heatmap(corr_matrix_pctChangeRiskOff_pctChangeRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) # plt.tight_layout() ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_pctChangeRiskOff_pctChangeRiskOn.png") find_top_correlations(corr_matrix_pctChangeRiskOff_pctChangeRiskOn) plt.figure(figsize=(27, 18)) LOGGER.info( "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_derivedPxDiffRiskOff_diffRiskOn" ) ax = sns.heatmap(corr_matrix_derivedPxDiffRiskOff_diffRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_derivedPxDiffRiskOff_diffRiskOn.png") # plt.tight_layout() find_top_correlations(corr_matrix_derivedPxDiffRiskOff_diffRiskOn) plt.figure(figsize=(27, 18)) LOGGER.info("yield_curve_risk_pricer.correlation_heatmap(): " "corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn") ax = sns.heatmap(corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn" ".png") # plt.tight_layout() find_top_correlations( corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn) plt.figure(figsize=(27, 18)) LOGGER.info("yield_curve_risk_pricer.correlation_heatmap(): " "corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn") ax = sns.heatmap(corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn, annot=True, linewidths=0.5) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12) ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12) ax.figure.savefig( OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) + "corr_matrix_derivedPxChangeRiskOff_diffRiskOn.png") # plt.tight_layout() find_top_correlations( corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn)
def __init__(self): self.logger = get_logger() local_misc_data_path = '/workspace/data/' self.local_stock_universe_file_pwd = OSMuxImpl.get_proper_path( local_misc_data_path) stock_universe_file_name = 'IWB_holdings' self.stock_universe_file_type = '.csv' self.total_pwd_stock_universe_file = self.local_stock_universe_file_pwd + \ stock_universe_file_name + self.stock_universe_file_type try: last_time_stock_universe_file_modified = os.path.getmtime( self.total_pwd_stock_universe_file) except FileNotFoundError: last_time_stock_universe_file_modified = "" russell_1000_stock_universe = 'Russ1K_holdings' self.total_pwd_russell1000 = self.local_stock_universe_file_pwd + \ russell_1000_stock_universe + self.stock_universe_file_type try: last_time_russell_1000_stock_universe_file_modified = os.path.getmtime( self.total_pwd_russell1000) except FileNotFoundError: last_time_russell_1000_stock_universe_file_modified = "" russell_3000_stock_universe = 'Russ3K_holdings' self.total_pwd_russell3000 = self.local_stock_universe_file_pwd + \ russell_3000_stock_universe + self.stock_universe_file_type try: last_time_russell_3000_stock_universe_file_modified = os.path.getmtime( self.total_pwd_russell3000) except FileNotFoundError: last_time_russell_3000_stock_universe_file_modified = "" nq_100_stock_universe = 'NQ100' self.total_pwd_nq100 = self.local_stock_universe_file_pwd + \ nq_100_stock_universe + self.stock_universe_file_type try: last_time_nq_100_stock_universe_file_modified = os.path.getmtime( self.total_pwd_nq100) except FileNotFoundError: last_time_nq_100_stock_universe_file_modified = "" sp_500_stock_universe = 'SP500' self.total_pwd_sp500 = self.local_stock_universe_file_pwd + \ sp_500_stock_universe + self.stock_universe_file_type try: last_time_sp_500_stock_universe_file_modified = os.path.getmtime( self.total_pwd_nq100) except FileNotFoundError: last_time_sp_500_stock_universe_file_modified = "" dow_30_stock_universe = 'DOW30' self.total_pwd_dow30 = self.local_stock_universe_file_pwd + \ dow_30_stock_universe + self.stock_universe_file_type try: last_time_dow_30_stock_universe_file_modified = os.path.getmtime( self.total_pwd_nq100) except FileNotFoundError: last_time_dow_30_stock_universe_file_modified = "" russ_2k_stock_universe = 'R2K' self.total_pwd_r2k = self.local_stock_universe_file_pwd + \ russ_2k_stock_universe + self.stock_universe_file_type try: last_time_r2k_stock_universe_file_modified = os.path.getmtime( self.total_pwd_r2k) except FileNotFoundError: last_time_r2k_stock_universe_file_modified = "" self.last_modified_times = { stock_universe_file_name: last_time_stock_universe_file_modified, russell_3000_stock_universe: last_time_russell_3000_stock_universe_file_modified, russell_1000_stock_universe: last_time_russell_1000_stock_universe_file_modified, russ_2k_stock_universe: last_time_r2k_stock_universe_file_modified, nq_100_stock_universe: last_time_nq_100_stock_universe_file_modified, sp_500_stock_universe: last_time_sp_500_stock_universe_file_modified, dow_30_stock_universe: last_time_dow_30_stock_universe_file_modified } self.stock_universe_download_func = { stock_universe_file_name: self.download_stock_universe, russell_1000_stock_universe: self.download_russell_1000_stock_universe, russell_3000_stock_universe: self.download_russell_3000_stock_universe, russ_2k_stock_universe: self.download_r2k_holdings_symbol_list, nq_100_stock_universe: self.download_nq100_holdings_symbol_list, sp_500_stock_universe: self.download_sp500_holdings_symbol_list, dow_30_stock_universe: self.download_dow30_holdings_symbol_list }
def rolling_eurodollar_os_sl_corr( self, ir_class="EURODOLLARS", contract='ED4_WHITE', observation_start='2014-06-01', observation_end=pd.datetime.now().strftime('%Y-%m-%d'), which_lag=1, rolling_window_size=60, rolling_pnl_window_size=90, execution_slippage=-0.0025, min_input_vol=0.0, pos_correl_filter_val=0.2, neg_correl_filter_val=-0.2): # default window size is one week,there are two observations per day. qdo_eurodollar = QuandlDataObject(ir_class, contract, '.csv') ed_df = qdo_eurodollar.get_df() ed_df = ed_df[observation_start:observation_end] # if nothing traded on a certain day, just drop the row - e.g. Dec 5th, 2018 - market # closed for the funeral of George Bush the first. ed_df = ed_df[ed_df.Volume > 0.0] ed_df['OpenSettleDelta'] = ed_df.Settle - ed_df.Open ed_df['OpenLastDelta'] = ed_df.Last - ed_df.Open ed_df['SettleLastDelta'] = ed_df.Last - ed_df.Settle ed_df['SettleNextOpenDelta'] = ed_df.Open.shift( periods=-which_lag) - ed_df.Settle ed_df['LastNextOpenDelta'] = ed_df.Open.shift( periods=-which_lag) - ed_df.Last conditions = [ (pd.to_numeric(ed_df.OpenSettleDelta.mul(1000000.0), downcast='integer') > int( min_input_vol * 1000000.0)), # one tick = 5000 (pd.to_numeric(ed_df.OpenSettleDelta.mul(1000000.0), downcast='integer') < int( -min_input_vol * 1000000.0)) ] # one tick = 5000 ol_delta_conditions = [ (pd.to_numeric(ed_df.OpenLastDelta.mul(1000000.0), downcast='integer') > int( min_input_vol * 1000000.0)), # one tick = 5000 (pd.to_numeric(ed_df.OpenLastDelta.mul(1000000.0), downcast='integer') < int( -min_input_vol * 1000000.0)) ] # one tick = 5000 ## the below, 1.0 or -1.0 multiples, tells us whether we expect reversion in next period, ## or autocorrelation. (-1.0,1.0) = reversion, (1.0, -1.0) = autocorrelation choices_settle_last = [ ed_df.SettleLastDelta.mul(1.0), ed_df.SettleLastDelta.mul(-1.0) ] choices_settle_nextopen = [ ed_df.SettleNextOpenDelta.mul(1.0), ed_df.SettleNextOpenDelta.mul(-1.0) ] choices_last_nextopen = [ ed_df.LastNextOpenDelta.mul(1.0), ed_df.LastNextOpenDelta.mul(-1.0) ] ed_df['SettleLastTradeSelect'] = np.select(conditions, choices_settle_last, default=0.0) ed_df['SettleNextOpenTradeSelect'] = np.select(conditions, choices_settle_nextopen, default=0.0) ed_df['LastNextOpenTradeSelect'] = np.select(ol_delta_conditions, choices_last_nextopen, default=0.0) ed_df['os_sl_corr_series'] = ed_df.OpenSettleDelta.rolling( rolling_window_size).corr(ed_df.SettleLastDelta) ed_df['os_snxto_corr_series'] = ed_df.OpenSettleDelta.rolling( rolling_window_size).corr(ed_df.SettleNextOpenDelta) ed_df['ol_lnxto_corr_series'] = ed_df.OpenLastDelta.rolling( rolling_window_size).corr(ed_df.LastNextOpenDelta) ed_df['rolling_reversion_trade_pnl'] = ed_df.SettleLastTradeSelect.rolling(rolling_pnl_window_size).\ sum().div(0.005) ed_df['fwd_looking_rolling_reversion_trade_pnl'] = ed_df.rolling_reversion_trade_pnl.\ shift(-1*rolling_pnl_window_size+1) ed_df['rolling_reversion_settleNextOpen_trade_pnl'] = ed_df.SettleNextOpenTradeSelect.\ rolling(rolling_pnl_window_size).sum().div(0.005) ed_df['fwd_looking_rolling_reversion_settleNextOpen_trade_pnl'] = ed_df.rolling_reversion_settleNextOpen_trade_pnl.\ shift(-1*rolling_pnl_window_size+1) ed_df['rolling_reversion_lastNextOpen_trade_pnl'] = ed_df.LastNextOpenTradeSelect.\ rolling(rolling_pnl_window_size).sum().div(0.005) ed_df['fwd_looking_rolling_reversion_lastNextOpen_trade_pnl'] = ed_df.rolling_reversion_lastNextOpen_trade_pnl.\ shift(-1*rolling_pnl_window_size+1) ed_df['os_sl_lagged_corr_series'] = ed_df.os_sl_corr_series.shift( periods=1) ed_df[ 'os_snxto_lagged_corr_series'] = ed_df.os_snxto_corr_series.shift( periods=1) ed_df[ 'ol_lnxto_lagged_corr_series'] = ed_df.ol_lnxto_corr_series.shift( periods=1) # create filter conditions for correlation correl_filter = {} correl_filter['os_sl'] = [(operator.gt, pos_correl_filter_val), (operator.lt, neg_correl_filter_val), operator.or_] correl_filter['os_snxto'] = [(operator.gt, pos_correl_filter_val), (operator.lt, neg_correl_filter_val), operator.or_] correl_filter['ol_lnxto'] = [(operator.gt, pos_correl_filter_val), (operator.lt, neg_correl_filter_val), operator.or_] pos_ind_series = ed_df.apply( self.rolling_eurodollar_session_corr_pos_ind, args=(correl_filter, ), axis=1) ed_df['os_sl_pos_ind'] = pos_ind_series.apply( lambda pos_ind_tuple: pos_ind_tuple[0]) ed_df['os_snxto_pos_ind'] = pos_ind_series.apply( lambda pos_ind_tuple: pos_ind_tuple[1]) ed_df['ol_lnxto_pos_ind'] = pos_ind_series.apply( lambda pos_ind_tuple: pos_ind_tuple[2]) np_os_sl_pos_ind = ed_df.os_sl_pos_ind.values np_os_snxto_pos_ind = ed_df.os_snxto_pos_ind.values np_ol_lnxto_pos_ind = ed_df.ol_lnxto_pos_ind.values np_array_list = [ np.repeat( pos_ind, np.min([ rolling_pnl_window_size, len(np_os_sl_pos_ind) - item_idx ])) for item_idx, pos_ind in enumerate(np_os_sl_pos_ind) ] final_np_array_list = [ np.append( np.append( np.repeat( 0, np.min([item_idx, len(np_array_list) - len(npa)])), np.array(npa)), np.repeat( 0, np.max([len(np_array_list) - (item_idx + len(npa)), 0]))) for item_idx, npa in enumerate(np_array_list) ] np_os_snxto_array_list = [ np.repeat( pos_ind, np.min([ rolling_pnl_window_size, len(np_os_snxto_pos_ind) - item_idx ])) for item_idx, pos_ind in enumerate(np_os_snxto_pos_ind) ] final_np_os_snxto_array_list = [ np.append( np.append( np.repeat( 0, np.min( [item_idx, len(np_os_snxto_array_list) - len(npa)])), np.array(npa)), np.repeat( 0, np.max([ len(np_os_snxto_array_list) - (item_idx + len(npa)), 0 ]))) for item_idx, npa in enumerate(np_os_snxto_array_list) ] np_ol_lnxto_array_list = [ np.repeat( pos_ind, np.min([ rolling_pnl_window_size, len(np_ol_lnxto_pos_ind) - item_idx ])) for item_idx, pos_ind in enumerate(np_ol_lnxto_pos_ind) ] final_np_ol_lnxto_array_list = [ np.append( np.append( np.repeat( 0, np.min( [item_idx, len(np_ol_lnxto_array_list) - len(npa)])), np.array(npa)), np.repeat( 0, np.max([ len(np_ol_lnxto_array_list) - (item_idx + len(npa)), 0 ]))) for item_idx, npa in enumerate(np_ol_lnxto_array_list) ] self.logger.info( "FredAPI:rolling_eurodollar_os_sl_corr(): final_np_array list dimensions are %s", np.array(final_np_array_list).shape) os_sl_total_pos_ind = np.sum(np.array(final_np_array_list), axis=0) os_snxto_total_pos_ind = np.sum(np.array(final_np_os_snxto_array_list), axis=0) ol_lnxto_total_pos_ind = np.sum(np.array(final_np_ol_lnxto_array_list), axis=0) ed_df['os_sl_total_pos_ind'] = pd.Series(os_sl_total_pos_ind, index=ed_df.index) ed_df['os_snxto_total_pos_ind'] = pd.Series(os_snxto_total_pos_ind, index=ed_df.index) ed_df['ol_lnxto_total_pos_ind'] = pd.Series(ol_lnxto_total_pos_ind, index=ed_df.index) ed_df['FinalSettleLastTradeSelect'] = ed_df['SettleLastTradeSelect'].mul(ed_df['os_sl_total_pos_ind']).\ add(ed_df['os_sl_total_pos_ind'].abs().mul(execution_slippage)) ed_df['FinalSettleNextOpenTradeSelect'] = ed_df['SettleNextOpenTradeSelect'].\ mul(ed_df['os_snxto_total_pos_ind']).add(ed_df['os_snxto_total_pos_ind'].abs().mul(execution_slippage)) ed_df['FinalLastNextOpenTradeSelect'] = ed_df['LastNextOpenTradeSelect'].\ mul(ed_df['ol_lnxto_total_pos_ind']).add(ed_df['ol_lnxto_total_pos_ind'].abs().mul(execution_slippage)) ed_df.os_sl_total_pos_ind.plot( title='Open-Settle/Settle-Last Total Pos Ind') plt.show() ed_df.os_snxto_total_pos_ind.plot( title='Open-Settle/Settle-NextOpen Total Pos Ind') plt.show() ed_df.ol_lnxto_total_pos_ind.plot( title='Open-Last/Last-NextOpen Total Pos Ind') plt.show() ed_df.FinalSettleLastTradeSelect.cumsum().plot( title='Settle-to-Last Cumm Pnl') plt.show() ed_df.FinalSettleNextOpenTradeSelect.cumsum().plot( title='Settle-to-NextOpen Cumm Pnl') plt.show() ed_df.FinalLastNextOpenTradeSelect.cumsum().plot( title='Last-to-NextOpen Cumm Pnl') plt.show() ed_df.to_csv('/Users/traderghazy/workspace/data/ed_df.csv') data = ed_df[[ 'os_sl_lagged_corr_series', 'os_snxto_lagged_corr_series', 'ol_lnxto_lagged_corr_series', 'SettleLastTradeSelect', 'SettleNextOpenTradeSelect', 'LastNextOpenTradeSelect', 'rolling_reversion_trade_pnl', 'fwd_looking_rolling_reversion_trade_pnl', 'rolling_reversion_settleNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_settleNextOpen_trade_pnl', 'rolling_reversion_lastNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_lastNextOpen_trade_pnl' ]].dropna() """ the correl_filter is the conditions for filtering the correlations Make sure the last item in this list is either operation.and_ or operator.or_... this will tell the filter how to combine the conditions. """ p_scat_1, p_scat_2, p_scat_3, p_correl_line = ExtendBokeh.bokeh_ed_ir_rolling_ticks_correl( data, title=[ 'OS-SL Rolling Cum. Sum vs. Correl', 'OS-SL Rolling Fwd Cum. Sum vs. Correl', 'OS-SL Point Value vs. Correl', 'OS-SL Correlation vs. Datetime' ], subtitle=['', '', '', ''], diff_types_to_correlate='os_sl', type_list=[ 'rolling_reversion_trade_pnl', 'fwd_looking_rolling_reversion_trade_pnl', 'SettleLastTradeSelect', 'os_sl_lagged_corr_series' ], rolling_window_size=rolling_window_size, correl_filter=correl_filter) p_scat_4, p_scat_5, p_scat_6, p_os_snxto_correl_line = ExtendBokeh.\ bokeh_ed_ir_rolling_ticks_correl(data, title=['OS-SNXTO Rolling Cum. Sum vs. Correl', 'OS-SNXTO Rolling Fwd Cum. Sum vs. Correl', 'OS-SNXTO Point Value vs. Correl', 'OS-SNXTO Correlation vs. Datetime'], subtitle=['', '', '', ''], diff_types_to_correlate='os_snxto', type_list=['rolling_reversion_settleNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_settleNextOpen_trade_pnl', 'SettleNextOpenTradeSelect', 'os_snxto_lagged_corr_series'], rolling_window_size=rolling_window_size, correl_filter=correl_filter) p_scat_7, p_scat_8, p_scat_9, p_ol_lnxto_correl_line = ExtendBokeh.\ bokeh_ed_ir_rolling_ticks_correl(data,title=['OL-LNXTO Rolling Cum. Sum vs. Correl', 'OL-LNXTO Rolling Fwd Cum. Sum vs. Correl', 'OL-LNXTO Point Value vs. Correl', 'OL-LNXTO Correlation vs. Datetime'], subtitle=['', '', '', ''], diff_types_to_correlate='ol_lnxto', type_list=['rolling_reversion_lastNextOpen_trade_pnl', 'fwd_looking_rolling_reversion_lastNextOpen_trade_pnl', 'LastNextOpenTradeSelect', 'ol_lnxto_lagged_corr_series'], rolling_window_size=rolling_window_size, correl_filter=correl_filter) the_plots = [ p_scat_1, p_scat_2, p_scat_3, p_correl_line, p_scat_4, p_scat_5, p_scat_6, p_os_snxto_correl_line, p_scat_7, p_scat_8, p_scat_9, p_ol_lnxto_correl_line ] html_output_file_path = OSMuxImpl.get_proper_path( '/workspace/data/bokeh/html/') html_output_file_title = ir_class + '_' + contract + ".scatter.html" html_output_file = html_output_file_path + html_output_file_title ExtendBokeh.show_hist_plots(the_plots, html_output_file, html_output_file_title)
doc = Document() doc.add_root(layout) return doc @staticmethod def validate_show_document(html_document, html_filename, html_dir, viewHtml=False): html_document.validate() proper_dir = OSMuxImpl.get_proper_path(html_dir) proper_filename = proper_dir + html_filename with open(proper_filename, "w", encoding='utf-8') as f: f.write(file_html(html_document, INLINE, "Data Tables")) LOGGER.info( "extend_bokeh_datatables.ExtendBokeh.validate_show_document(): wrote %s in dir %s ", html_filename, proper_dir) if viewHtml is not False: view(proper_filename) if __name__ == "__main__": doc = ExtendDataTable.make_example_datatable() doc.validate() dir_name = "workspace/data/bokeh/html/" filename = OSMuxImpl.get_proper_path(dir_name) + "data_tables.html" with open(filename, "w", encoding='utf-8') as f: f.write(file_html(doc, INLINE, "Data Tables")) print("Wrote %s" % filename) view(filename)