Beispiel #1
0
def get_xsd():
    # GHAZY MAHJUB: 5/1/2020: Could not get urllib3 to work, getting certificate errors with pyopenssl
    #               Reverted to using requests library.
    # http = urllib3.PoolManager()
    response = requests.get(REMOTE_XSD_LOCATION)
    # response = http.urlopen("GET", REMOTE_XSD_LOCATION)
    LOGGER.info(
        "daily_nominal_yeild_curve.get_xsd(): HTTP RESPONSE STATUS CODE " +
        str(response.status_code))
    zip_content = response.content
    zip_timestamp = datetime.now().isoformat()
    local_nominal_yield_curve_xsd_zip_loc_full_path = OSMuxImpl.get_proper_path(
        LOCAL_NOMINAL_YIELD_CURVE_XSD_ZIP_LOC)
    with open(
            local_nominal_yield_curve_xsd_zip_loc_full_path +
            "DailyTreasuryYieldCurveRateData" + str(zip_timestamp) + ".zip",
            'wb') as f:
        f.write(zip_content)

    xsd_zip_file = ZipFile(local_nominal_yield_curve_xsd_zip_loc_full_path +
                           "DailyTreasuryYieldCurveRateData" +
                           str(zip_timestamp) + ".zip")
    local_nominal_yield_curve_xsd_loc = OSMuxImpl.get_proper_path(
        LOCAL_NOMINAL_YIELD_CURVE_XSD_LOC)
    LOGGER.info(
        "nominal_yield_curve.get_xsd(): extracting zip file into directory %s",
        local_nominal_yield_curve_xsd_loc)
    xsd_zip_file.extractall(path=local_nominal_yield_curve_xsd_loc)
    xsd_zip_file.close()
def get_xsd():
    response = requests.get(REMOTE_XSD_LOCATION)
    LOGGER.info(
        "daily_real_yield_curve.get_xsd(): HTTP RESPONSE STATUS CODE " +
        str(response.status_code))
    zip_content = response.content
    zip_timestamp = datetime.now().isoformat()
    local_real_yield_curve_xsd_zip_loc_full_path = OSMuxImpl.get_proper_path(
        LOCAL_REAL_YIELD_CURVE_XSD_ZIP_LOC)
    with open(
            local_real_yield_curve_xsd_zip_loc_full_path +
            "DailyTreasuryRealYieldCurveRateData" + str(zip_timestamp) +
            ".zip", 'wb') as f:
        f.write(zip_content)

    xsd_zip_file = ZipFile(local_real_yield_curve_xsd_zip_loc_full_path +
                           "DailyTreasuryRealYieldCurveRateData" +
                           str(zip_timestamp) + ".zip")
    local_real_yield_curve_xsd_loc = OSMuxImpl.get_proper_path(
        LOCAL_REAL_YIELD_CURVE_XSD_LOC)
    LOGGER.info(
        "real_yield_curve.get_xsd(): extracting zip file into directory %s",
        local_real_yield_curve_xsd_loc)
    xsd_zip_file.extractall(path=local_real_yield_curve_xsd_loc +
                            "DailyTreasuryRealYieldCurveRateData.xsd/")
    xsd_zip_file.close()
Beispiel #3
0
def cboe_selenium_connect():
    driver = webdriver.Chrome('/Users/ghazymahjub/chromedriver/chromedriver')
    driver.get(CBOE_LOGIN_URL)
    driver.find_element_by_id("ContentTop_C022_emailOrUserId").send_keys("*****@*****.**")
    driver.find_element_by_id("ContentTop_C022_Password").send_keys("8adxQBeFF$d!$qp")
    driver.find_element_by_id("ContentTop_C022_btnLogin").click()
    driver.implicitly_wait(15)
    import time
    for url_key in sorted(set(url_dict.keys())):
        LOGGER.info("cboe.cboe_selenium_connect(): sleep for 15 seconds, zzzzzzzzz....")
        time.sleep(15)
        LOGGER.info("cboe.cboe_selenium_connect(): woke up!")
        LOGGER.info("cboe.cboe_selenium_connect(): getting %s", url_dict[url_key][0])
        driver.get(url_dict[url_key][0])
        LOGGER.info("cboe.cboe_selenium_connect(): sleep for 15 seconds, zzzzzzzzz....")
        time.sleep(15)
        LOGGER.info("cboe.cboe_selenium_connect(): woke up!")
        downloaded_file_path = OSMuxImpl.get_proper_path(DOWNLOAD_DATA_DIR) + url_dict[url_key][1]
        move_to_file_path = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + url_dict[url_key][1]
        try:
            shutil.move(downloaded_file_path, move_to_file_path)
        except FileNotFoundError as fnfe:
            LOGGER.error("cboe.cboe_selenium_connect(): shutil move failed with... %s from file: "
                         "%s, to file: %s", fnfe.__str__(), downloaded_file_path, move_to_file_path)
    driver.quit()
    return
Beispiel #4
0
    def __init__(self):

        self.logger = get_logger()
        self.source = 'fred'
        self.api_key = SecureKeysAccess.get_vendor_api_key_static(
            vendor=str.upper(self.source))
        self.fred_pwd = OSMuxImpl.get_proper_path('/workspace/data/fred/')
        self.seaborn_plots_pwd = OSMuxImpl.get_proper_path(
            '/workspace/data/seaborn/plots/')
        self.fred = Fred(api_key=self.api_key)
Beispiel #5
0
    def __init__(self, sec_type_list=['cs', 'et']):

        self.logger = get_logger()
        self.iex_trading_root_url = "https://api.iextrading.com/1.0"
        self.get_symbols_universe_url = "/ref-data/symbols"
        self.sec_type_list = sec_type_list
        self.master_sector_indusry_df = pd.DataFrame(
            columns=['Sector', 'Industry'])
        self.master_sector_industry_file = OSMuxImpl.get_proper_path(
            '/workspace/data/IEX/') + 'master_sector_industry.csv'
        self.iex_html_path = OSMuxImpl.get_proper_path(
            '/workspace/data/IEX/html/')
        self.co_earnings_path = OSMuxImpl.get_proper_path(
            '/workspace/data/IEX/earnings/')
Beispiel #6
0
    def __init__(
            self,
            stock_universe_filename='Russ3K_holdings',
            use_iex_trading_symbol_universe=False,
            sec_type_list=['cs', 'et'],
            daily_window_sizes=[30, 60, 90, 120, 180, 270],
            weekly_window_sizes=[
                4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52
            ],
            monthly_window_sizes=[3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33,
                                  36]):

        self.logger = get_logger()
        self.sm = StatisticalMoments()
        self.stock_universe_data = None
        self.stock_universe_filename = stock_universe_filename
        self.use_iex_trading_symbol_universe = use_iex_trading_symbol_universe
        self.sec_type_list = sec_type_list
        self.plotly_histograms_dir = OSMuxImpl.get_proper_path(
            '/workspace/data/plotly/histograms/')
        self.daily_window_sizes = daily_window_sizes
        self.weekly_window_sizes = weekly_window_sizes

        self.window_size_dict = {
            'D': daily_window_sizes,
            'W': weekly_window_sizes,
            'M': monthly_window_sizes
        }

        self.index_ticker_dict = {
            'SPY': 'SP500'
        }  #, 'QQQ': 'NQ100', 'IWM': 'Russell2000', 'DIA': 'Dow30'}
        self.index_ticker_df = pd.DataFrame(data=list(
            self.index_ticker_dict.items()),
                                            columns=['Symbol', 'Name'])
Beispiel #7
0
def read_historical_voltermstruct_csv(eod_run=True, which_product=None):
    """
    Read the historical files into a dataframe and then write out to csv and insert into db.
    the eod_run parameter asks is equal to True as a Default. This means you are not running
    a historical run but an actual EOD daily insert. That means we can insert a subset of the
    data, like the past 7 days for example, as opposed to the entire historical file.
    This will save us a lot of time when doing EOD runs.
    :param which_product:
    :param eod_run:
    :return:
    """
    if which_product is not None:
        prod_list = [which_product]
    else:
        prod_list = url_dict.keys()
    for vol_prod in prod_list:
        move_to_file_path = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + url_dict[vol_prod][1]
        LOGGER.info("cboe.read_historical_voltermstructure_csv(): loading csv file %s into dataframe",
                    move_to_file_path)
        df = pd.read_csv(move_to_file_path, skiprows=3, parse_dates=True, index_col=0, header=None)
        df.columns = ['Volume', 'd_Open', 'd_High', 'd_Low', 'd_Close']
        df.index.name = 'Date'
        df.drop(['Volume'], axis=1, inplace=True)
        df.to_csv(move_to_file_path + ".clean")
        # store into database table
        if eod_run:
            # reindex using the last 7 days
            last_day_in_df = df.index[-1].to_pydatetime()
            first_day_reindex = last_day_in_df - timedelta(days=7)
            df_ins = df.loc[str(first_day_reindex):]
        else:
            df_ins = df
        vix_term_structure_to_db(df_ins, symbol_to_insert=vol_prod)
Beispiel #8
0
def pyparse_xml(xml_file):
    xsd_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XSD_LOC) + XSD_ZIP_DIR_PATH + \
               LOCAL_NOMINAL_YIELD_CURVE_XSD
    LOGGER.info("nominal_yield_curve.parse_xml(): Using xsd schema at %s ",
                xsd_file)
    try:
        schema = xmlschema.XMLSchema(xsd_file, validation="strict")
        schema.validate(xml_file)
        if not schema.is_valid(xml_file):
            LOGGER.error(
                "nominal_yield_curve.parse_xml(): %s xml file is NOT validate against schema %s !",
                xml_file, xsd_file)
            LOGGER.error(
                "nominal_yield_curve.parse_xml(): is today a weekend day? empty xml files on weekends, check %s",
                xml_file)
        else:
            LOGGER.info(
                "nominal_yield_curve.parse_xml(): %s xml file is valid against schema %s ! ",
                xml_file, xsd_file)
    except XMLSchemaValidationError:
        LOGGER.error("nominal_yield_curve.pyparse_xml(): validation failed!")
    except XMLSchemaParseError:
        LOGGER.error(
            "nominal_yield_curve.pyparse_xml(): xmlschemaparse error!")

    xt = ElementTree.parse(xml_file)
    root = xt.getroot()
    yc_xml_to_dict = schema.to_dict(xml_file)
    top_level_keys = yc_xml_to_dict.keys()
    entry_yc_xml_list = yc_xml_to_dict['entry']
    return_dict = {}
    for entry_dict in entry_yc_xml_list:
        content_dict = entry_dict['content']
        content_meta_properties = content_dict['m:properties']
        date_of_rate_data = content_meta_properties['d:NEW_DATE']
        sub_return_dict = {}
        for cmp_key in sorted(set(content_meta_properties.keys())):
            act_data_dict = content_meta_properties[cmp_key]
            for add_key in sorted(set(act_data_dict.keys())):
                act_data = act_data_dict[add_key]
                # there is an @m:type key, which is the Entity Data Model data type
                # for example: the d:NEW_DATE key is @m:type Edm.DateTime
                # for example: the d:BC_7EAR key is @m:type is Edm.Double
                # then there is a '$' key, which is the actual data.
                # print("cmp_key", cmp_key)
                # print("add_key", add_key)
                # print("act_data", act_data)
                if sub_return_dict.get(cmp_key, None) is None:
                    value_dict = {add_key: act_data}
                    sub_return_dict[cmp_key] = value_dict
                else:
                    value_dict = sub_return_dict.get(cmp_key)
                    value_dict[add_key] = act_data
                    # we don't need the below since it's a pointer.
                    sub_return_dict[cmp_key] = value_dict
                return_dict[date_of_rate_data['$']] = sub_return_dict
    df = pd.DataFrame.from_dict(data=return_dict, orient='index')
    return df
Beispiel #9
0
def run_month_insert(month=0, year=0):
    if month is 0:
        month = datetime.now().month
    if year is 0:
        year = datetime.now().year
    MONTH_YIELD_CURVE_URL = "https://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData?" \
                            "$filter=month(NEW_DATE)%20eq%20" + str(month) + \
                            "%20and%20year(NEW_DATE)%20eq%20" + str(year)
    url_response = requests.get(url=MONTH_YIELD_CURVE_URL)
    LOGGER.info(
        "daily_nominal_yeild_curve.write_response_file(): HTTP RESPONSE STATUS CODE "
        + str(url_response.status_code))
    month_tydotgov_nominal_yields_xml_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XML_LOC) + \
                                             "DailyTreasuryYieldCurveRateData" + \
                                             date(year, month, 1).isoformat() + ".xml"
    with open(month_tydotgov_nominal_yields_xml_file, 'wb') as f:
        f.write(url_response.content)
    # check empty xml
    entry_count = check_empty_xml(month_tydotgov_nominal_yields_xml_file)
    if entry_count is 0:
        LOGGER.error(
            "nominal_yield_curve:run_daily_insert(): the xml file %s is empty, nothing to insert",
            month_tydotgov_nominal_yields_xml_file)
        return
    fix_invalid_xml(month_tydotgov_nominal_yields_xml_file)
    LOGGER.info(
        "nominal_yield_curve.daily_insert(): ftp'ing into mysql server xml file %s",
        month_tydotgov_nominal_yields_xml_file)
    sftp_xml(month_tydotgov_nominal_yields_xml_file)
    LOGGER.info(
        "nominal_yield_curve.daily_insert(): running parse_xml() on %s ",
        month_tydotgov_nominal_yields_xml_file)
    df_daily = pyparse_xml(month_tydotgov_nominal_yields_xml_file)
    yc_entries = df_daily.apply(vectorized_insert_from_pyparse, axis=1)
    session = get_db_session()
    from sqlalchemy.exc import IntegrityError as SqlAlchemyIntegrityError
    from pymysql.err import IntegrityError as PymysqlIntegrityError
    session.flush()
    for yc_entry in yc_entries.iteritems():
        LOGGER.info("nominal_yield_curve:inserting yield curve entry %s %s",
                    yc_entry[0], yc_entry[1])
        yc_entry_db = session.query(DailyNominalUsTyGovYieldCurve).filter_by(
            Id=yc_entry[1].Id).first()
        if yc_entry_db is None:
            LOGGER.info(
                "nominial_yield_curve.run_month_insert(%s,%s): entries for date %s do not exist, inserting...",
                str(year), str(month), yc_entry[1].NEW_DATE)
            session.add(yc_entry[1])
        else:
            LOGGER.info(
                "nominial_yield_curve.run_month_insert(%s,%s): entries for date %s do exist, updating...",
                str(year), str(month), yc_entry[1].NEW_DATE)
            yc_entry_db.set_all(yc_entry[1])
    session.commit()
Beispiel #10
0
    def __init__(self, start_date, end_date, symbols):

        super().__init__()
        self.logger = get_logger()

        self.start_date = start_date
        self.end_date = end_date
        self.symbols = symbols

        self.get_px_adj_close = lambda x: web.DataReader(
            x, 'yahoo', start=self.start_date, end=self.end_date)['Adj Close']
        self.get_px_open = lambda x: web.DataReader(
            x, 'yahoo', start=self.start_date, end=self.end_date)['Open']
        self.get_px_high = lambda x: web.DataReader(
            x, 'yahoo', start=self.start_date, end=self.end_date)['High']
        self.get_px_low = lambda x: web.DataReader(
            x, 'yahoo', start=self.start_date, end=self.end_date)['Low']
        self.get_volume = lambda x: web.DataReader(
            x, 'yahoo', start=self.start_date, end=self.end_date)['Volume']

        self.get_px_all = lambda x: web.DataReader(
            x, 'yahoo', start=self.start_date, end=self.end_date)

        self.all_px_df = None
        self.adj_close_px_df = None
        self.open_px_df = None
        self.high_px_df = None
        self.low_px_df = None
        self.volume_df = None

        self.local_yahoo_data_path = '/workspace/data/yahoo/'
        self.local_file_type = '.csv'
        self.local_data_file_pwd = OSMuxImpl.get_proper_path(
            self.local_yahoo_data_path)

        self.local_adj_close_file_name = '_'.join(
            self.symbols) + '_AdjClose' + self.local_file_type
        self.local_open_file_name = '_'.join(
            self.symbols) + '_Open' + self.local_file_type
        self.local_high_file_name = '_'.join(
            self.symbols) + '_High' + self.local_file_type
        self.local_low_file_name = '_'.join(
            self.symbols) + '_Low' + self.local_file_type
        self.local_volume_file_name = '_'.join(
            self.symbols) + '_Volume' + self.local_file_type
        self.local_all_file_name = '_'.join(
            self.symbols) + self.local_file_type

        self.logger.info("YahooDataObject.__init__.local_data_file_pwd: %s",
                         str(self.local_data_file_pwd))

        self.yahoo_client_id = "dj0yJmk9Q09ZdnVWMlNEdzdxJmQ9WVdrOWNFTnNRMFV3TkRRbWNHbzlNQS0tJnM9Y29uc3VtZXJzZWNyZXQmeD1mNA--"
        self.yahoo_client_secret = "41f7939217e13b297ce3862be55c5b9e4b77cab8"
Beispiel #11
0
    def download_quandl_stock_csv_file(self, idx):

        url = "https://www.quandl.com/api/v3/datasets/EOD/" + idx + ".csv?api_key=" + self.quandl_auth_token
        response = requests.get(url)
        local_data_file_pwd = OSMuxImpl.get_proper_path(
            self.local_stock_data_path)
        total_local_file_name = local_data_file_pwd + idx + ".csv"
        # write out the response to file
        with open(total_local_file_name, 'wb') as f:
            f.write(response.content)
        self.logger.info("QuandlSymbolInterface.download_quandl_stock_csv_file(): HTTP Response Status Code %s " + \
                         str(response.status_code))
Beispiel #12
0
    def prepare_heritability_case_study_data(self):

        datacamp_dir = OSMuxImpl.get_proper_path(user_provided_path='/workspace/data/datacamp/')
        scandens_csv = datacamp_dir + "scandens_beak_depth_heredity.csv"
        fortis_csv = datacamp_dir + "fortis_beak_depth_heredity.csv"
        df_scandens = pd.read_csv(scandens_csv, sep = ",", header = 0)
        df_fortis = pd.read_csv(fortis_csv, sep = ",", header = 0)
        bd_offspring_scandens = df_scandens.mid_offspring.values
        bd_parent_scandens = df_scandens['mid_parent'].values
        bd_offspring_fortis = df_fortis['Mid-offspr'].values
        bd_parent_fortis = (df_fortis['Male BD'].values + df_fortis['Female BD'].values)/2.0
        return (bd_offspring_scandens, bd_parent_scandens, bd_offspring_fortis, bd_parent_fortis)
Beispiel #13
0
def check_valid_xml(xml_file, xsd_file):
    error_syntax_log = OSMuxImpl.get_proper_path(LOCAL_XML_PARSE_ERROR_LOG)
    with open(xsd_file, 'r') as schema_file:
        schema_to_check = schema_file.read()
    # open and read xml file
    with open(xml_file, 'r') as xml_f:
        xml_to_check = xml_f.read()
    xmlschema_doc = etree.parse(StringIO(schema_to_check))
    xml_schema = etree.XMLSchema(xmlschema_doc)
    # parse xml
    try:
        doc = etree.parse(StringIO(xml_to_check))
        LOGGER.info(
            "nominal_yield_curve.check_valid_xml(): all is well so far, parsed successfully"
            " xml file %s", xml_file)
    # check for file IO error
    except IOError as io_error:
        LOGGER.error(
            "nominal_yield_curve.check_valid_xml(): invalid file, exception is %s",
            str(io_error))
    # check for XML syntax errors
    except etree.XMLSyntaxError as err:
        LOGGER.error(
            "nominal_yield_curve.check_xml_valid(): schema validation error %s, see error_schema.log!",
            str(err))
        with open(error_syntax_log, 'w') as error_log_file:
            error_log_file.write(str(err.error_log))
        quit()
    except:
        LOGGER.error(
            "nominal_yield_curve.check_xml_valid(): unknown error, quitting..."
        )
        quit()
    # validate against schema
    try:
        xml_schema.assertValid(doc)
        LOGGER.info(
            "nominal_yield_curve.check_xml_valid(): XML Valid, schema validation ok %s %s",
            xsd_file, xml_file)
    except etree.DocumentInvalid as err:
        LOGGER.error(
            "nominal_yield_curve.check_xml_valid(): schema validation error %s %s, see error_schema.log!",
            xsd_file, xml_file)
        with open(error_syntax_log, 'w') as error_log_file:
            error_log_file.write(str(err.error_log))
        quit()
    except:
        LOGGER.error(
            "nominal_yield_curve.check_xml_valid(): unknown error, quitting..."
        )
        quit()
Beispiel #14
0
    def __init__(self, tickers, writer_filename=None):

        self.tickers = tickers
        self.writer_filename = writer_filename
        self.logger = get_logger()
        self.source = 'simfin'
        self.api_key = SecureKeysAccess.get_vendor_api_key_static(
            vendor=str.upper(self.source))
        self.simfin_pwd = OSMuxImpl.get_proper_path('/workspace/data/simfin/')
        if writer_filename is not None:
            self.writer = self.get_writer(self.simfin_pwd + writer_filename)
        else:
            self.writer = None
        self.sim_ids = self.get_sim_ids(tickers)
 def validate_show_document(html_document,
                            html_filename,
                            html_dir,
                            viewHtml=False):
     html_document.validate()
     proper_dir = OSMuxImpl.get_proper_path(html_dir)
     proper_filename = proper_dir + html_filename
     with open(proper_filename, "w", encoding='utf-8') as f:
         f.write(file_html(html_document, INLINE, "Data Tables"))
     LOGGER.info(
         "extend_bokeh_datatables.ExtendBokeh.validate_show_document(): wrote %s in dir %s ",
         html_filename, proper_dir)
     if viewHtml is not False:
         view(proper_filename)
Beispiel #16
0
    def prepare_finch_beak_case_study_data(self):

        """Specific function to prepare the finch beak case study data. Simply read
           in the data from csv into Panda's dataframe, and then return individual columns
           of data as numpy arrays.
        """
        datacamp_dir = OSMuxImpl.get_proper_path(user_provided_path='/workspace/data/datacamp/')
        finch_beak_1975 = datacamp_dir + "finch_beaks_1975.csv"
        finch_beak_2012 = datacamp_dir + "finch_beaks_2012.csv"
        df_1975 = pd.read_csv(finch_beak_1975, sep = ',', header = 0)
        df_2012 = pd.read_csv(finch_beak_2012, sep = ',', header = 0)
        bd_1975 = df_1975.bdepth.values
        bl_1975 = df_1975.blength.values
        bd_2012 = df_2012['Beak depth, mm'].values
        bl_2012 = df_2012['Beak length, mm'].values

        return (bd_1975, bd_2012, bl_1975, bl_2012)
def validate_show_document(html_document,
                           html_filename,
                           html_dir,
                           viewHtml=False):
    from bokeh.embed import file_html
    from bokeh.resources import INLINE
    from bokeh.util.browser import view
    print("the html document", file_html(html_document, INLINE,
                                         "CorrelTables"))
    html_document.validate()
    proper_dir = OSMuxImpl.get_proper_path(html_dir)
    proper_filename = proper_dir + html_filename
    with open(proper_filename, "w") as f:
        f.write(file_html(html_document, INLINE, "Data Tables"))
    LOGGER.info(
        "extend_bokeh_datatables.ExtendBokeh.validate_show_document(): wrote %s in dir %s ",
        html_filename, proper_dir)
    if viewHtml is not False:
        view(proper_filename)
Beispiel #18
0
def screenscrape_daily_volvals():
    return_dict = {}
    as_of_date = None
    session = get_db_session()
    for vol_prod in cboe_dashboard_urls_dict.keys():
        html_text = requests.get(cboe_dashboard_urls_dict[vol_prod]).text
        b_soup = BeautifulSoup(html_text, 'html.parser')
        attrs = {
            'id': 'div-summary'
        }
        keys_list = []
        values_list = []
        for div_elem in b_soup.find_all('div', attrs=attrs):
            for sub_elem in div_elem.find_all('h5'):
                keys_list.append(sub_elem.text)
            for sub_elem in div_elem.find_all('span'):
                values_list.append(sub_elem.text)
        as_of_date = values_list[-1]
        as_of_date = as_of_date.split("As of ")[1]
        vol_prod_dict = dict(zip(keys_list, values_list))
        vol_prod_dict['AsOfDate'] = re.sub('\s+',' ',as_of_date).strip()
        as_of_date = vol_prod_dict['AsOfDate']
        as_of_date_dt = datetime.strptime(as_of_date, '%Y-%m-%d %H:%M:%S (ET)')
        as_of_date_date = as_of_date_dt.date()
        vts = VixTermStructure(Id=as_of_date_date,
                               Symbol=vol_prod,
                               Change=vol_prod_dict['Change'],
                               Open=float(vol_prod_dict['Open']),
                               High=float(vol_prod_dict['High']),
                               Low=float(vol_prod_dict['Low']),
                               Close=float(vol_prod_dict['Prev Close']),
                               LastSale=float(vol_prod_dict['Last Sale']),
                               LastTime=as_of_date_dt)
        vts_to_db(vts, session)
        return_dict[vol_prod] = vol_prod_dict
    df = pd.DataFrame(data=return_dict)
    daily_vals_csv_file = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + "daily_vol_vals_" + str(as_of_date) + ".csv"
    as_of_date_dt = datetime.strptime(as_of_date, '%Y-%m-%d %H:%M:%S (ET)')
    as_of_date_dt = as_of_date_dt.replace(tzinfo=timezone("EST"))
    df.to_csv(daily_vals_csv_file)
    return df
    def __init__(self, class_of_data, local_symbol, local_file_type):

        # class of data
        ################################
        # 'FOREX'
        # 'EURODOLLARS'
        # 'INTEREST_RATES'
        # 'ECONOMIC_INDICATORS_UNADJ'
        # 'ECONOMIC_INDICATORS_SEAS_ADJ'
        # 'FED_FORECASTS'
        # 'MISC'
        # 'STOCKS'

        # local_symbol
        ################################
        # See the static dictionaries in quandl_interface.py.
        # The local symbol is the key in the key,value pairs that make up those non-reversed dictionaries.

        # local_file_type
        ################################
        # usually .csv

        super().__init__()
        self.qsi = QuandlSymbolInterface()
        self.class_of_data = class_of_data
        self.local_symbol = local_symbol
        self.local_file_type = local_file_type
        self.quandl_symbol = self.qsi.get_quandl_symbol(
            class_of_data, local_symbol)

        self.local_data_file_pwd = OSMuxImpl.get_proper_path(
            self.qsi.get_local_quandl_data_path(class_of_data))
        self.logger.info("QuandlDataObject.__init__.local_data_file_pwd " +
                         self.local_data_file_pwd)
        self.local_data_file_name = local_symbol + self.local_file_type
        self.newest_date_at_quandl_dt = None
        self.last_time_file_modified = None
        if self.does_local_file_exist() and self.is_local_file_old() == False:
            self.df = self.get_df_from_local_csv()
        else:
            self.df = self.get_from_quandl()
Beispiel #20
0
def batch_historical_get(no_ftp=False):
    # connect to treasury.gov xml feed.
    all_hist_tydotgov_nominal_yields_xml_url = "https://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData"
    url_response = requests.get(url=all_hist_tydotgov_nominal_yields_xml_url)
    hist_tydotgov_nominal_yields_xml_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XML_LOC) + \
                                            "DailyTreasuryYieldCurveRateData-Historical.xml"
    # write out the response to file
    with open(hist_tydotgov_nominal_yields_xml_file, 'wb') as f:
        f.write(url_response.content)
        LOGGER.info(
            "historical_nominal_yield_curve.write_response_file(): HTTP Response Status Code "
            + str(url_response.status_code))
    entry_count = check_empty_xml(hist_tydotgov_nominal_yields_xml_file)
    if entry_count is 0:
        LOGGER.error(
            "nominal_yield_curve.batch_historical_get(): historical xml file is empty %s ",
            hist_tydotgov_nominal_yields_xml_file)
        no_ftp = True
    if not no_ftp:
        sftp_xml(hist_tydotgov_nominal_yields_xml_file)
    return hist_tydotgov_nominal_yields_xml_file
Beispiel #21
0
def get_historical_csv(key=None, url=None):
    if url is None:
        url = "https://www.cboe.com/chart/GetDownloadData/"
        payload = {'RequestSymbol': 'VIX1Y'}
    else:
        payload = None
    url_response = requests.get(url=url, params=payload)
    if url_response.status_code != 200:
        LOGGER.error("cboe.get_historical_csv(): HTTP RESPONSE STATUS CODE %s", str(url_response.status_code))
        LOGGER.error("cboe.get_historical_csv(): url request failed %s", url)
        return url_response.status_code
    attachmentFilename = url_response.headers['Content-Disposition']
    csv_filename = attachmentFilename.split('=')[1].replace('"','')
    LOGGER.info("cboe.get_historical_csv(): HTTP RESPONSE STATUS CODE " +
                str(url_response.status_code))
    historical_csv_file = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + csv_filename
    with open(historical_csv_file, 'wb') as f:
        for chunk in url_response.iter_content(chunk_size=128):
            f.write(chunk)
    f.close()
    return url_response.status_code
    def __init__(self, **kwargs):

        self.logger = get_logger()
        self.file_mod_time_dict = {}
        self.symbols = None
        self.start_date = ""
        self.end_date = ""
        self.source = "tiingo"
        self.api_key = SecureKeysAccess.get_vendor_api_key_static(
            vendor=str.upper(self.source))

        for key, value in kwargs.items():

            if key == 'symbols':
                self.symbols = value
                self.logger.info("TiingoDataObject.__init__.symbols: %s",
                                 str(self.symbols))
            elif key == 'start_date':
                self.start_date = value
                self.logger.info("TiingoDataObject.__init__.start_date: %s",
                                 str(self.start_date))
            elif key == 'end_date':
                self.end_date = value
                self.logger.info("TiingoDataObject.__init__.end_date: %s",
                                 str(self.end_date))
            elif key == 'source':
                self.source = value
                self.logger.info("TiingoDataObject.__init__.source: %s",
                                 str(self.source))

        self.get_px_all_tiingo = lambda x: web.get_data_tiingo(
            x,
            start='2010-01-01',
            end=str(pd.to_datetime('today')).split(' ')[0],
            api_key=self.api_key)
        self.all_px_df = None
        self.local_tiingo_data_path = '/workspace/data/tiingo/stocks/'
        self.local_file_type = '.csv'
        self.local_data_file_pwd = OSMuxImpl.get_proper_path(
            self.local_tiingo_data_path)
Beispiel #23
0
def get_historical_vx_data_from_cboe(contract_expiry_date):
    from os import path
    total_url = CBOE_FUTURES_BASE_URL + VX_CSV_REMOTE_DIR + str(contract_expiry_date)
    file_prefix = "CFE_"
    file_suffix = "_VX.csv"
    keys_list = list(MONTHLY_EXPIRY_MONTH_CODE_MAPPING.keys())
    values_list = list(MONTHLY_EXPIRY_MONTH_CODE_MAPPING.values())
    year = datetime.strftime(contract_expiry_date, "%y")
    month_code = keys_list[values_list.index(contract_expiry_date.month)]
    full_front_filename = file_prefix + month_code + str(year) + file_suffix
    path_to_front_month_file = OSMuxImpl.get_proper_path(LOCAL_CBOE_DATA_DIR) + full_front_filename
    if contract_expiry_date<datetime.now().date() and path.exists(path_to_front_month_file):
        # this contract is expired already. We may have it in the flat file system.
        return 200
    LOGGER.info("cboe.get_historical_vx_data_from_cboe(%s): getting historical vx data from url %s...",
                contract_expiry_date, total_url)
    return_value = get_historical_csv(url=total_url)
    if return_value == 404 and contract_expiry_date > \
            (datetime.now().date() + timedelta(days=180)):
        LOGGER.info("cboe.get_historical_csv(): 404 Page not found, most likely this is ok...")
    elif return_value == 404:
        LOGGER.error("cboe.get_historical_csv(): 404 Page not found, there is something WRONG! %s", total_url)
    return return_value
Beispiel #24
0
def run_daily_insert():
    TODAY_DATE = datetime.now()
    DAILY_YIELD_CURVE_URL = "https://data.treasury.gov/feed.svc/DailyTreasuryYieldCurveRateData?" \
                            "$filter=day(NEW_DATE)%20eq%20" + str(TODAY_DATE.day) + \
                            "%20and%20month(NEW_DATE)%20eq%20" + str(TODAY_DATE.month) + \
                            "%20and%20year(NEW_DATE)%20eq%20" + str(TODAY_DATE.year)
    url_response = requests.get(url=DAILY_YIELD_CURVE_URL)
    LOGGER.info(
        "daily_nominal_yeild_curve.write_response_file(): HTTP RESPONSE STATUS CODE "
        + str(url_response.status_code))
    daily_tydotgov_nominal_yields_xml_file = OSMuxImpl.get_proper_path(LOCAL_NOMINAL_YIELD_CURVE_XML_LOC) + \
                                             "DailyTreasuryYieldCurveRateDataToday.xml"
    with open(daily_tydotgov_nominal_yields_xml_file, 'wb') as f:
        f.write(url_response.content)
    # check empty xml
    entry_count = check_empty_xml(daily_tydotgov_nominal_yields_xml_file)
    if entry_count is 0:
        LOGGER.error(
            "nominal_yield_curve:run_daily_insert(): the xml file %s is empty, nothing to insert",
            daily_tydotgov_nominal_yields_xml_file)
        return
    fix_invalid_xml(daily_tydotgov_nominal_yields_xml_file)
    LOGGER.info(
        "nominal_yield_curve.daily_insert(): ftp'ing into mysql server xml file %s",
        daily_tydotgov_nominal_yields_xml_file)
    sftp_xml(daily_tydotgov_nominal_yields_xml_file)
    LOGGER.info(
        "nominal_yield_curve.daily_insert(): running parse_xml() on %s ",
        daily_tydotgov_nominal_yields_xml_file)
    df_daily = pyparse_xml(daily_tydotgov_nominal_yields_xml_file)
    yc_entries = df_daily.apply(vectorized_insert_from_pyparse, axis=1)
    session = get_db_session()
    for yc_entry in yc_entries.iteritems():
        LOGGER.info("nominal_yield_curve:inserting yield curve entry %s %s",
                    yc_entry[0], yc_entry[1])
        session.add(yc_entry[1])
    session.commit()
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
from root.nested import get_logger

sns.set()
rcParams.update({'figure.autolayout': True})
LOGGER = get_logger()
YIELD_DIFF_RETS_PERIODS = [1, 2, 5, 10, 20, 40, 60, 120, 252]
PAR_BOND_PRICE = 1000.0
TRACK_INDEX_NOMINAL_POINT_YIELD_SPREAD_CSV = "workspace/data/treasurygov/analysis/"
HTML_FILES_DIR = "workspace/data/bokeh/html/"
SEABORN_PNG_FILES_DIR = "/workspace/data/seaborn/png/"
TRACK_INDEX_NOMINAL_POINT_YIELD_SPREAD_CSV = \
    OSMuxImpl.get_proper_path(TRACK_INDEX_NOMINAL_POINT_YIELD_SPREAD_CSV) + "tpyls.csv"
""" Get bond price from YTM """


def bond_price(par, T, ytm, coup, freq=2):
    """
    BROKEN FUNCTION, DO NOT USE!
    :param par:
    :param T:
    :param ytm:
    :param coup:
    :param freq:
    :return:
    """
    freq = float(freq)
    periods = T * freq
def correlation_heatmaps(df, duration_point, risk_asset_list):
    LOGGER.info(
        "yield_curve_risk_pricer.make_spread(): running make_spread() function..."
    )
    col_list = df.columns
    risk_off_diffs_list = []
    risk_off_pct_change_s_list = []
    risk_off_derived_px_diffs_list = []
    risk_off_derived_px_pctChange_list = []
    for dp in duration_point:
        label = nominal_yield_curve.DURATION_TO_DBCOL_MAPPING[dp]
        for ydr_per in YIELD_DIFF_RETS_PERIODS:
            risk_off_diffs = label + '_' + str(ydr_per) + 'D_DIFF'
            risk_off_pct_change_s = label + '_' + str(ydr_per) + 'D_PCT_CHANGE'
            risk_off_derived_px_diffs = label + '_UPDATED_PRICE_' + str(
                ydr_per) + 'D_DIFF'
            risk_off_derived_px_pctChange = label + '_UPDATED_PRICE_' + str(
                ydr_per) + 'D_PCT_CHANGE'
            risk_off_diffs_list.append(risk_off_diffs)
            risk_off_pct_change_s_list.append(risk_off_pct_change_s)
            risk_off_derived_px_diffs_list.append(risk_off_derived_px_diffs)
            risk_off_derived_px_pctChange_list.append(
                risk_off_derived_px_pctChange)
        diffs_intersection_of_lists = \
            list(sorted(set(risk_off_diffs_list).intersection(col_list)))
        pct_change_s_intersection_of_lists = \
            list(sorted(set(risk_off_pct_change_s_list).intersection(col_list)))
        derived_px_diffs_intersection_of_lists = \
            list(sorted(set(risk_off_derived_px_diffs_list).intersection(col_list)))
        derived_px_pctChange_intersection_of_lists = \
            list(sorted(set(risk_off_derived_px_pctChange_list).intersection(col_list)))
        for risk_on in risk_asset_list:
            risk_on_diffs_list = []
            risk_on_pct_change_s_list = []
            for ydr_per in YIELD_DIFF_RETS_PERIODS:
                risk_on_diffs = risk_on + '_' + str(ydr_per) + 'D_DIFF'
                risk_on_pct_change_s = risk_on + '_' + str(
                    ydr_per) + 'D_PCT_CHANGE'
                risk_on_diffs_list.append(risk_on_diffs)
                risk_on_pct_change_s_list.append(risk_on_pct_change_s)
            diffs_intersection_risk_on_list = list(
                sorted(set(risk_on_diffs_list).intersection(col_list)))
            pct_change_s_intersection_risk_on_list = list(
                sorted(set(risk_on_pct_change_s_list).intersection(col_list)))
            corr_matrix_diffRiskOff_diffRiskOn = \
                df[diffs_intersection_of_lists + diffs_intersection_risk_on_list].corr()
            corr_matrix_diffRiskOff_pctChangeRiskOn = \
                df[diffs_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr()
            corr_matrix_pctChangeRiskOff_diffRiskOn = \
                df[pct_change_s_intersection_of_lists + diffs_intersection_risk_on_list].corr()
            corr_matrix_pctChangeRiskOff_pctChangeRiskOn = \
                df[pct_change_s_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr()

            # derived px diff/pct_change versus risk-on diff/pctChange
            corr_matrix_derivedPxDiffRiskOff_diffRiskOn = \
                df[derived_px_diffs_intersection_of_lists + diffs_intersection_risk_on_list].corr()
            corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn = \
                df[derived_px_diffs_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr()
            corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn = \
                df[derived_px_pctChange_intersection_of_lists + diffs_intersection_risk_on_list].corr()
            corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn = \
                df[derived_px_pctChange_intersection_of_lists + pct_change_s_intersection_risk_on_list].corr()

            plt.figure(figsize=(27, 18))
            LOGGER.info(
                "yield_curve_risk_pricer.correlation_heatmap(): "
                "corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn")
            ax = sns.heatmap(
                corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn,
                annot=True,
                linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn.png")
            # plt.tight_layout()
            top_correl_df = find_top_correlations(
                corr_matrix_derivedPxPctChangeRiskOff_pctChangeRiskOn)
            print(top_correl_df)
            make_spread_price(top_correl_df, df)
            return

            plt.figure(figsize=(27, 18))
            LOGGER.info(
                "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_diffRiskOff_diffRiskOn"
            )
            ax = sns.heatmap(corr_matrix_diffRiskOff_diffRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_diffRiskOff_diffRiskOn.png")
            top_correl_df = find_top_correlations(
                corr_matrix_diffRiskOff_diffRiskOn)
            make_spread_price(top_correl_df, df)

            plt.figure(figsize=(27, 18))
            LOGGER.info(
                "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_diffRiskOff_pctChangeRiskOn"
            )
            ax = sns.heatmap(corr_matrix_diffRiskOff_pctChangeRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            # plt.tight_layout()
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_diffRiskOff_pctChangeRiskOn.png")
            find_top_correlations(corr_matrix_diffRiskOff_pctChangeRiskOn)

            plt.figure(figsize=(27, 18))
            LOGGER.info(
                "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_pctChangeRiskOff_diffRiskOn"
            )
            ax = sns.heatmap(corr_matrix_pctChangeRiskOff_diffRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            # plt.tight_layout()
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_pctChangeRiskOff_diffRiskOn.png")
            find_top_correlations(corr_matrix_pctChangeRiskOff_diffRiskOn)

            plt.figure(figsize=(27, 18))
            LOGGER.info(
                "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_pctChangeRiskOff_pctChangeRiskOn"
            )
            ax = sns.heatmap(corr_matrix_pctChangeRiskOff_pctChangeRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            # plt.tight_layout()
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_pctChangeRiskOff_pctChangeRiskOn.png")
            find_top_correlations(corr_matrix_pctChangeRiskOff_pctChangeRiskOn)

            plt.figure(figsize=(27, 18))
            LOGGER.info(
                "yield_curve_risk_pricer.correlation_heatmap(): corr_matrix_derivedPxDiffRiskOff_diffRiskOn"
            )
            ax = sns.heatmap(corr_matrix_derivedPxDiffRiskOff_diffRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_derivedPxDiffRiskOff_diffRiskOn.png")
            # plt.tight_layout()
            find_top_correlations(corr_matrix_derivedPxDiffRiskOff_diffRiskOn)

            plt.figure(figsize=(27, 18))
            LOGGER.info("yield_curve_risk_pricer.correlation_heatmap(): "
                        "corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn")
            ax = sns.heatmap(corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn"
                ".png")
            # plt.tight_layout()
            find_top_correlations(
                corr_matrix_derivedPxDiffRiskOff_pctChangeRiskOn)

            plt.figure(figsize=(27, 18))
            LOGGER.info("yield_curve_risk_pricer.correlation_heatmap(): "
                        "corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn")
            ax = sns.heatmap(corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn,
                             annot=True,
                             linewidths=0.5)
            ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=12)
            ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize=12)
            ax.figure.savefig(
                OSMuxImpl.get_proper_path(SEABORN_PNG_FILES_DIR) +
                "corr_matrix_derivedPxChangeRiskOff_diffRiskOn.png")
            # plt.tight_layout()
            find_top_correlations(
                corr_matrix_derivedPxPctChangeRiskOff_diffRiskOn)
Beispiel #27
0
    def __init__(self):

        self.logger = get_logger()

        local_misc_data_path = '/workspace/data/'
        self.local_stock_universe_file_pwd = OSMuxImpl.get_proper_path(
            local_misc_data_path)

        stock_universe_file_name = 'IWB_holdings'
        self.stock_universe_file_type = '.csv'
        self.total_pwd_stock_universe_file = self.local_stock_universe_file_pwd + \
                                             stock_universe_file_name + self.stock_universe_file_type
        try:
            last_time_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_stock_universe_file)
        except FileNotFoundError:
            last_time_stock_universe_file_modified = ""

        russell_1000_stock_universe = 'Russ1K_holdings'
        self.total_pwd_russell1000 = self.local_stock_universe_file_pwd + \
                                     russell_1000_stock_universe + self.stock_universe_file_type
        try:
            last_time_russell_1000_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_russell1000)
        except FileNotFoundError:
            last_time_russell_1000_stock_universe_file_modified = ""

        russell_3000_stock_universe = 'Russ3K_holdings'
        self.total_pwd_russell3000 = self.local_stock_universe_file_pwd + \
                                     russell_3000_stock_universe + self.stock_universe_file_type
        try:
            last_time_russell_3000_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_russell3000)
        except FileNotFoundError:
            last_time_russell_3000_stock_universe_file_modified = ""

        nq_100_stock_universe = 'NQ100'
        self.total_pwd_nq100 = self.local_stock_universe_file_pwd + \
                               nq_100_stock_universe + self.stock_universe_file_type
        try:
            last_time_nq_100_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_nq100)
        except FileNotFoundError:
            last_time_nq_100_stock_universe_file_modified = ""

        sp_500_stock_universe = 'SP500'
        self.total_pwd_sp500 = self.local_stock_universe_file_pwd + \
                               sp_500_stock_universe + self.stock_universe_file_type
        try:
            last_time_sp_500_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_nq100)
        except FileNotFoundError:
            last_time_sp_500_stock_universe_file_modified = ""

        dow_30_stock_universe = 'DOW30'
        self.total_pwd_dow30 = self.local_stock_universe_file_pwd + \
                               dow_30_stock_universe + self.stock_universe_file_type
        try:
            last_time_dow_30_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_nq100)
        except FileNotFoundError:
            last_time_dow_30_stock_universe_file_modified = ""

        russ_2k_stock_universe = 'R2K'
        self.total_pwd_r2k = self.local_stock_universe_file_pwd + \
                             russ_2k_stock_universe + self.stock_universe_file_type
        try:
            last_time_r2k_stock_universe_file_modified = os.path.getmtime(
                self.total_pwd_r2k)
        except FileNotFoundError:
            last_time_r2k_stock_universe_file_modified = ""

        self.last_modified_times = {
            stock_universe_file_name: last_time_stock_universe_file_modified,
            russell_3000_stock_universe:
            last_time_russell_3000_stock_universe_file_modified,
            russell_1000_stock_universe:
            last_time_russell_1000_stock_universe_file_modified,
            russ_2k_stock_universe: last_time_r2k_stock_universe_file_modified,
            nq_100_stock_universe:
            last_time_nq_100_stock_universe_file_modified,
            sp_500_stock_universe:
            last_time_sp_500_stock_universe_file_modified,
            dow_30_stock_universe:
            last_time_dow_30_stock_universe_file_modified
        }

        self.stock_universe_download_func = {
            stock_universe_file_name: self.download_stock_universe,
            russell_1000_stock_universe:
            self.download_russell_1000_stock_universe,
            russell_3000_stock_universe:
            self.download_russell_3000_stock_universe,
            russ_2k_stock_universe: self.download_r2k_holdings_symbol_list,
            nq_100_stock_universe: self.download_nq100_holdings_symbol_list,
            sp_500_stock_universe: self.download_sp500_holdings_symbol_list,
            dow_30_stock_universe: self.download_dow30_holdings_symbol_list
        }
Beispiel #28
0
    def rolling_eurodollar_os_sl_corr(
            self,
            ir_class="EURODOLLARS",
            contract='ED4_WHITE',
            observation_start='2014-06-01',
            observation_end=pd.datetime.now().strftime('%Y-%m-%d'),
            which_lag=1,
            rolling_window_size=60,
            rolling_pnl_window_size=90,
            execution_slippage=-0.0025,
            min_input_vol=0.0,
            pos_correl_filter_val=0.2,
            neg_correl_filter_val=-0.2):

        # default window size is one week,there are two observations per day.
        qdo_eurodollar = QuandlDataObject(ir_class, contract, '.csv')
        ed_df = qdo_eurodollar.get_df()
        ed_df = ed_df[observation_start:observation_end]
        # if nothing traded on a certain day, just drop the row - e.g. Dec 5th, 2018 - market
        # closed for the funeral of George Bush the first.
        ed_df = ed_df[ed_df.Volume > 0.0]
        ed_df['OpenSettleDelta'] = ed_df.Settle - ed_df.Open
        ed_df['OpenLastDelta'] = ed_df.Last - ed_df.Open
        ed_df['SettleLastDelta'] = ed_df.Last - ed_df.Settle
        ed_df['SettleNextOpenDelta'] = ed_df.Open.shift(
            periods=-which_lag) - ed_df.Settle
        ed_df['LastNextOpenDelta'] = ed_df.Open.shift(
            periods=-which_lag) - ed_df.Last
        conditions = [
            (pd.to_numeric(ed_df.OpenSettleDelta.mul(1000000.0),
                           downcast='integer') > int(
                               min_input_vol * 1000000.0)),  # one tick = 5000
            (pd.to_numeric(ed_df.OpenSettleDelta.mul(1000000.0),
                           downcast='integer') < int(
                               -min_input_vol * 1000000.0))
        ]  # one tick = 5000
        ol_delta_conditions = [
            (pd.to_numeric(ed_df.OpenLastDelta.mul(1000000.0),
                           downcast='integer') > int(
                               min_input_vol * 1000000.0)),  # one tick = 5000
            (pd.to_numeric(ed_df.OpenLastDelta.mul(1000000.0),
                           downcast='integer') < int(
                               -min_input_vol * 1000000.0))
        ]  # one tick = 5000
        ## the below, 1.0 or -1.0 multiples, tells us whether we expect reversion in next period,
        ## or autocorrelation. (-1.0,1.0) = reversion, (1.0, -1.0) = autocorrelation
        choices_settle_last = [
            ed_df.SettleLastDelta.mul(1.0),
            ed_df.SettleLastDelta.mul(-1.0)
        ]
        choices_settle_nextopen = [
            ed_df.SettleNextOpenDelta.mul(1.0),
            ed_df.SettleNextOpenDelta.mul(-1.0)
        ]
        choices_last_nextopen = [
            ed_df.LastNextOpenDelta.mul(1.0),
            ed_df.LastNextOpenDelta.mul(-1.0)
        ]
        ed_df['SettleLastTradeSelect'] = np.select(conditions,
                                                   choices_settle_last,
                                                   default=0.0)
        ed_df['SettleNextOpenTradeSelect'] = np.select(conditions,
                                                       choices_settle_nextopen,
                                                       default=0.0)
        ed_df['LastNextOpenTradeSelect'] = np.select(ol_delta_conditions,
                                                     choices_last_nextopen,
                                                     default=0.0)

        ed_df['os_sl_corr_series'] = ed_df.OpenSettleDelta.rolling(
            rolling_window_size).corr(ed_df.SettleLastDelta)
        ed_df['os_snxto_corr_series'] = ed_df.OpenSettleDelta.rolling(
            rolling_window_size).corr(ed_df.SettleNextOpenDelta)
        ed_df['ol_lnxto_corr_series'] = ed_df.OpenLastDelta.rolling(
            rolling_window_size).corr(ed_df.LastNextOpenDelta)

        ed_df['rolling_reversion_trade_pnl'] = ed_df.SettleLastTradeSelect.rolling(rolling_pnl_window_size).\
            sum().div(0.005)
        ed_df['fwd_looking_rolling_reversion_trade_pnl'] = ed_df.rolling_reversion_trade_pnl.\
            shift(-1*rolling_pnl_window_size+1)

        ed_df['rolling_reversion_settleNextOpen_trade_pnl'] = ed_df.SettleNextOpenTradeSelect.\
            rolling(rolling_pnl_window_size).sum().div(0.005)
        ed_df['fwd_looking_rolling_reversion_settleNextOpen_trade_pnl'] = ed_df.rolling_reversion_settleNextOpen_trade_pnl.\
            shift(-1*rolling_pnl_window_size+1)

        ed_df['rolling_reversion_lastNextOpen_trade_pnl'] = ed_df.LastNextOpenTradeSelect.\
            rolling(rolling_pnl_window_size).sum().div(0.005)
        ed_df['fwd_looking_rolling_reversion_lastNextOpen_trade_pnl'] = ed_df.rolling_reversion_lastNextOpen_trade_pnl.\
            shift(-1*rolling_pnl_window_size+1)

        ed_df['os_sl_lagged_corr_series'] = ed_df.os_sl_corr_series.shift(
            periods=1)
        ed_df[
            'os_snxto_lagged_corr_series'] = ed_df.os_snxto_corr_series.shift(
                periods=1)
        ed_df[
            'ol_lnxto_lagged_corr_series'] = ed_df.ol_lnxto_corr_series.shift(
                periods=1)
        # create filter conditions for correlation
        correl_filter = {}
        correl_filter['os_sl'] = [(operator.gt, pos_correl_filter_val),
                                  (operator.lt, neg_correl_filter_val),
                                  operator.or_]
        correl_filter['os_snxto'] = [(operator.gt, pos_correl_filter_val),
                                     (operator.lt, neg_correl_filter_val),
                                     operator.or_]
        correl_filter['ol_lnxto'] = [(operator.gt, pos_correl_filter_val),
                                     (operator.lt, neg_correl_filter_val),
                                     operator.or_]
        pos_ind_series = ed_df.apply(
            self.rolling_eurodollar_session_corr_pos_ind,
            args=(correl_filter, ),
            axis=1)
        ed_df['os_sl_pos_ind'] = pos_ind_series.apply(
            lambda pos_ind_tuple: pos_ind_tuple[0])
        ed_df['os_snxto_pos_ind'] = pos_ind_series.apply(
            lambda pos_ind_tuple: pos_ind_tuple[1])
        ed_df['ol_lnxto_pos_ind'] = pos_ind_series.apply(
            lambda pos_ind_tuple: pos_ind_tuple[2])
        np_os_sl_pos_ind = ed_df.os_sl_pos_ind.values
        np_os_snxto_pos_ind = ed_df.os_snxto_pos_ind.values
        np_ol_lnxto_pos_ind = ed_df.ol_lnxto_pos_ind.values
        np_array_list = [
            np.repeat(
                pos_ind,
                np.min([
                    rolling_pnl_window_size,
                    len(np_os_sl_pos_ind) - item_idx
                ])) for item_idx, pos_ind in enumerate(np_os_sl_pos_ind)
        ]
        final_np_array_list = [
            np.append(
                np.append(
                    np.repeat(
                        0, np.min([item_idx,
                                   len(np_array_list) - len(npa)])),
                    np.array(npa)),
                np.repeat(
                    0, np.max([len(np_array_list) - (item_idx + len(npa)),
                               0])))
            for item_idx, npa in enumerate(np_array_list)
        ]
        np_os_snxto_array_list = [
            np.repeat(
                pos_ind,
                np.min([
                    rolling_pnl_window_size,
                    len(np_os_snxto_pos_ind) - item_idx
                ])) for item_idx, pos_ind in enumerate(np_os_snxto_pos_ind)
        ]
        final_np_os_snxto_array_list = [
            np.append(
                np.append(
                    np.repeat(
                        0,
                        np.min(
                            [item_idx,
                             len(np_os_snxto_array_list) - len(npa)])),
                    np.array(npa)),
                np.repeat(
                    0,
                    np.max([
                        len(np_os_snxto_array_list) - (item_idx + len(npa)), 0
                    ]))) for item_idx, npa in enumerate(np_os_snxto_array_list)
        ]
        np_ol_lnxto_array_list = [
            np.repeat(
                pos_ind,
                np.min([
                    rolling_pnl_window_size,
                    len(np_ol_lnxto_pos_ind) - item_idx
                ])) for item_idx, pos_ind in enumerate(np_ol_lnxto_pos_ind)
        ]
        final_np_ol_lnxto_array_list = [
            np.append(
                np.append(
                    np.repeat(
                        0,
                        np.min(
                            [item_idx,
                             len(np_ol_lnxto_array_list) - len(npa)])),
                    np.array(npa)),
                np.repeat(
                    0,
                    np.max([
                        len(np_ol_lnxto_array_list) - (item_idx + len(npa)), 0
                    ]))) for item_idx, npa in enumerate(np_ol_lnxto_array_list)
        ]
        self.logger.info(
            "FredAPI:rolling_eurodollar_os_sl_corr(): final_np_array list dimensions are %s",
            np.array(final_np_array_list).shape)
        os_sl_total_pos_ind = np.sum(np.array(final_np_array_list), axis=0)
        os_snxto_total_pos_ind = np.sum(np.array(final_np_os_snxto_array_list),
                                        axis=0)
        ol_lnxto_total_pos_ind = np.sum(np.array(final_np_ol_lnxto_array_list),
                                        axis=0)
        ed_df['os_sl_total_pos_ind'] = pd.Series(os_sl_total_pos_ind,
                                                 index=ed_df.index)
        ed_df['os_snxto_total_pos_ind'] = pd.Series(os_snxto_total_pos_ind,
                                                    index=ed_df.index)
        ed_df['ol_lnxto_total_pos_ind'] = pd.Series(ol_lnxto_total_pos_ind,
                                                    index=ed_df.index)
        ed_df['FinalSettleLastTradeSelect'] = ed_df['SettleLastTradeSelect'].mul(ed_df['os_sl_total_pos_ind']).\
            add(ed_df['os_sl_total_pos_ind'].abs().mul(execution_slippage))
        ed_df['FinalSettleNextOpenTradeSelect'] = ed_df['SettleNextOpenTradeSelect'].\
            mul(ed_df['os_snxto_total_pos_ind']).add(ed_df['os_snxto_total_pos_ind'].abs().mul(execution_slippage))
        ed_df['FinalLastNextOpenTradeSelect'] = ed_df['LastNextOpenTradeSelect'].\
            mul(ed_df['ol_lnxto_total_pos_ind']).add(ed_df['ol_lnxto_total_pos_ind'].abs().mul(execution_slippage))
        ed_df.os_sl_total_pos_ind.plot(
            title='Open-Settle/Settle-Last Total Pos Ind')
        plt.show()
        ed_df.os_snxto_total_pos_ind.plot(
            title='Open-Settle/Settle-NextOpen Total Pos Ind')
        plt.show()
        ed_df.ol_lnxto_total_pos_ind.plot(
            title='Open-Last/Last-NextOpen Total Pos Ind')
        plt.show()
        ed_df.FinalSettleLastTradeSelect.cumsum().plot(
            title='Settle-to-Last Cumm Pnl')
        plt.show()
        ed_df.FinalSettleNextOpenTradeSelect.cumsum().plot(
            title='Settle-to-NextOpen Cumm Pnl')
        plt.show()
        ed_df.FinalLastNextOpenTradeSelect.cumsum().plot(
            title='Last-to-NextOpen Cumm Pnl')
        plt.show()
        ed_df.to_csv('/Users/traderghazy/workspace/data/ed_df.csv')
        data = ed_df[[
            'os_sl_lagged_corr_series', 'os_snxto_lagged_corr_series',
            'ol_lnxto_lagged_corr_series', 'SettleLastTradeSelect',
            'SettleNextOpenTradeSelect', 'LastNextOpenTradeSelect',
            'rolling_reversion_trade_pnl',
            'fwd_looking_rolling_reversion_trade_pnl',
            'rolling_reversion_settleNextOpen_trade_pnl',
            'fwd_looking_rolling_reversion_settleNextOpen_trade_pnl',
            'rolling_reversion_lastNextOpen_trade_pnl',
            'fwd_looking_rolling_reversion_lastNextOpen_trade_pnl'
        ]].dropna()
        """ the correl_filter is the conditions for filtering the correlations
            Make sure the last item in this list is either operation.and_ or operator.or_...
            this will tell the filter how to combine the conditions.
        """
        p_scat_1, p_scat_2, p_scat_3, p_correl_line = ExtendBokeh.bokeh_ed_ir_rolling_ticks_correl(
            data,
            title=[
                'OS-SL Rolling Cum. Sum vs. Correl',
                'OS-SL Rolling Fwd Cum. Sum vs. Correl',
                'OS-SL Point Value vs. Correl',
                'OS-SL Correlation vs. Datetime'
            ],
            subtitle=['', '', '', ''],
            diff_types_to_correlate='os_sl',
            type_list=[
                'rolling_reversion_trade_pnl',
                'fwd_looking_rolling_reversion_trade_pnl',
                'SettleLastTradeSelect', 'os_sl_lagged_corr_series'
            ],
            rolling_window_size=rolling_window_size,
            correl_filter=correl_filter)
        p_scat_4, p_scat_5, p_scat_6, p_os_snxto_correl_line = ExtendBokeh.\
            bokeh_ed_ir_rolling_ticks_correl(data, title=['OS-SNXTO Rolling Cum. Sum vs. Correl',
                                                          'OS-SNXTO Rolling Fwd Cum. Sum vs. Correl',
                                                          'OS-SNXTO Point Value vs. Correl',
                                                          'OS-SNXTO Correlation vs. Datetime'],
                                             subtitle=['', '', '', ''], diff_types_to_correlate='os_snxto',
                                             type_list=['rolling_reversion_settleNextOpen_trade_pnl',
                                                        'fwd_looking_rolling_reversion_settleNextOpen_trade_pnl',
                                                        'SettleNextOpenTradeSelect',
                                                        'os_snxto_lagged_corr_series'],
                                             rolling_window_size=rolling_window_size, correl_filter=correl_filter)
        p_scat_7, p_scat_8, p_scat_9, p_ol_lnxto_correl_line = ExtendBokeh.\
            bokeh_ed_ir_rolling_ticks_correl(data,title=['OL-LNXTO Rolling Cum. Sum vs. Correl',
                                                         'OL-LNXTO Rolling Fwd Cum. Sum vs. Correl',
                                                         'OL-LNXTO Point Value vs. Correl',
                                                         'OL-LNXTO Correlation vs. Datetime'],
                                             subtitle=['', '', '', ''], diff_types_to_correlate='ol_lnxto',
                                             type_list=['rolling_reversion_lastNextOpen_trade_pnl',
                                                        'fwd_looking_rolling_reversion_lastNextOpen_trade_pnl',
                                                        'LastNextOpenTradeSelect',
                                                        'ol_lnxto_lagged_corr_series'],
                                             rolling_window_size=rolling_window_size, correl_filter=correl_filter)
        the_plots = [
            p_scat_1, p_scat_2, p_scat_3, p_correl_line, p_scat_4, p_scat_5,
            p_scat_6, p_os_snxto_correl_line, p_scat_7, p_scat_8, p_scat_9,
            p_ol_lnxto_correl_line
        ]
        html_output_file_path = OSMuxImpl.get_proper_path(
            '/workspace/data/bokeh/html/')
        html_output_file_title = ir_class + '_' + contract + ".scatter.html"
        html_output_file = html_output_file_path + html_output_file_title
        ExtendBokeh.show_hist_plots(the_plots, html_output_file,
                                    html_output_file_title)
        doc = Document()
        doc.add_root(layout)
        return doc

    @staticmethod
    def validate_show_document(html_document,
                               html_filename,
                               html_dir,
                               viewHtml=False):
        html_document.validate()
        proper_dir = OSMuxImpl.get_proper_path(html_dir)
        proper_filename = proper_dir + html_filename
        with open(proper_filename, "w", encoding='utf-8') as f:
            f.write(file_html(html_document, INLINE, "Data Tables"))
        LOGGER.info(
            "extend_bokeh_datatables.ExtendBokeh.validate_show_document(): wrote %s in dir %s ",
            html_filename, proper_dir)
        if viewHtml is not False:
            view(proper_filename)


if __name__ == "__main__":
    doc = ExtendDataTable.make_example_datatable()
    doc.validate()
    dir_name = "workspace/data/bokeh/html/"
    filename = OSMuxImpl.get_proper_path(dir_name) + "data_tables.html"
    with open(filename, "w", encoding='utf-8') as f:
        f.write(file_html(doc, INLINE, "Data Tables"))
    print("Wrote %s" % filename)
    view(filename)