Ejemplo n.º 1
0
def get_html(urlQ, callback, xpath_hooks):
    """
    This page takes a url from the URL Queue (urlQ) and
    calls a callbac that will handle the page source.

    xpage_hooks is a list used to determine when the page is loaded,
    see the docs for more details (e.g. ["//div[@data-test='whatever']"] ).
    """
    svr = webkit_server.Server()
    svrconn = webkit_server.ServerConnection(server=svr)
    driver = dryscrape.driver.webkit.Driver(connection=svrconn)

    sess = dryscrape.Session(driver=driver)
    sess.set_header(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
    )
    sess.set_attribute("auto_load_images", False)

    valid_page_func = lambda: any(
        sess.at_xpath(xpath) for xpath in xpath_hooks)
    session = Session()

    while not urlQ.empty():
        url = urlQ.get()

        try:
            sess.visit(url)
        except webkit_server.InvalidResponseError:
            LOGGER.error(
                "Got invalid response from something? Skipping {}".format(url))
            continue

        try:
            sess.wait_for(valid_page_func, interval=1, timeout=15)
        except dryscrape.mixins.WaitTimeoutError:
            LOGGER.error("Timeout so skipping {}".format(url))
            continue

        response = sess.body()
        callback(session, url, response)
        sess.reset()

    svr.kill()
    session.close()
Ejemplo n.º 2
0
    def __init__(self):
        # This is a reverse engineering of the Yahoo Finance REST API
        # Information off: http://www.jarloo.com/yahoo_finance/
        self.y_to_db_map = {'n': 'name', 'y': 'dividend_yield', 'd': 'dividend_ps',
                            'r': 'pe', 'r1': 'dividend_pay_date', 'q': 'ex_dividend_date',
                            'o': 'open', 'c1': 'change', 'p2': 'perc_change', 'd1': 'last_trade_date',
                            'd2': 'trade_date', 'c3': 'commission', 'g': 'day_low', 'h': 'day_high',
                            'p': 'previous_close', 't8': 'year_target', 'm5': 'change_mv_avg_200',
                            'm6': 'perc_change_mv_avg_200', 'm7': 'change_mv_avg_50', 'm8': 'perc_change_mv_avg_50',
                            'm3': 'mv_avg_50', 'm4': 'mv_avg_200', 'w1': 'day_value_change',
                            'g1': 'holding_gain_perc', 'g3': 'annualized_gain', 'g4': 'holdings_gain',
                            'k': 'high_52_week', 'j': 'low_52_week', 'j5': 'change_52_week_low',
                            'k4': 'change_52_week_high', 'j6': 'perc_change_52_week_low',
                            'k5': 'perc_change_52_week_high', 'j1': 'market_cap',
                            'f6': 'float_shares', 'x': 'stock_exchange', 's1': 'shares_owned',
                            'j2': 'shares_outstanding', 'n4': 'notes', 'i': 'more_info',
                            'v': 'volume', 'a2': 'avg_daily_volume', 'e': 'eps', 'e7': 'eps_year_estimate',
                            'e8': 'eps_next_year_estimate', 'e9': 'eps_next_q_estimate', 'b4': 'book',
                            'j4': 'ebitda', 'p5': 'price_sale', 'p6': 'price_book', 'r': 'pe', 'r5': 'peg',
                            'r6': 'price_eps_estimate_year', 'r7': 'price_eps_estimate_next_year', 's7': 'short_ratio',
                            's6': 'revenue', 'v1': 'holdings_val', 'l2': 'high_limit', 'l3': 'low_limit',
                            'a': 'ask', 'b': 'bid'}
        self.convert_dict = {'K': 10 ** 3, 'M': 10 ** 6, 'B': 10 ** 9, 'T': 10 ** 12}
        self.condensed_pat = re.compile("([+-]?\d*\.?\d+)([kmbtKMBT])")
        self.url_flags = tuple(self.y_to_db_map.keys())
        self.url_str_flags = "".join(self.url_flags)
        self.db_entries = tuple(self.y_to_db_map.values())
        self.float_pat = re.compile("[+-]?(\d*[\.])?\d+$")
        self.today = datetime.today().date()
        self.base_url = "http://finance.yahoo.com/d/quotes.csv"

        # Sometimes websites are friendlier to iOS devices :)
        self.headers = {
            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
        }
        self.session = Session()
Ejemplo n.º 3
0
    def __init__(self):
        self.session = Session()
        self.today = datetime.today().date()
        self.ttm_string = self.most_recent_quarter()
        self.headers = {
            'User-Agent':
            "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36"
        }
        self.exchange_map = {
            "XTSE": "TSX",
        }

        self.year_month_cols = set(
            {"fiscal_year", "margin_date", "profitability_date"})

        self.column_key_map = tuple((
            ("revenue", "revenue"),
            ("gross margin", "gross_margin"),
            ("operating income", "operating_income"),
            ("operating margin", "operating_margin"),
            ("net income", "net_income"),
            ("earnings per share", "eps"),
            ("dividends", "dividends"),
            ("payout ratio", "payout_ratio"),
            ("shares", "num_shares"),
            ("book value per", "book_value_ps"),
            ("operating cash flow", "operating_cash_flow"),
            ("cap spending", "cap_spending"),
            ("cf free cash flow growth", "free_cash_flow_growth_yoy"),
            ("cf free cash flow/sales", "free_cash_flow_sales"),
            ("cf free cash flow/net", "free_cash_flow_net_income"),
            ("free cash flow per share", "free_cash_flow_ps"),
            ("free cash flow", "free_cash_flow"),
            ("working capital", "working_captial"),
            ("pro margins %", "margin_date"),
            ("pro revenue", "revenue_per_sales"),
            ("pro cogs", "revenue_per_cogs"),
            ("pro gross margin", "sales_gross_margin"),
            ("pro sg&a", "margin_sga"),
            ("pro r&d", "margin_rd"),
            ("pro other", "margin_other"),
            ("pro operating margin", "margin_operating"),
            ("pro net int inc", "margin_net_income"),
            ("pro ebt margin", "margin_ebt"),
            ("pro profitability", "profitability_date"),
            ("pro tax rate", "tax_rate"),
            ("pro net margin", "net_margin_perc"),
            ("pro asset turnover", "asset_turnover"),
            ("pro return on assets", "ro_assets"),
            ("pro financial lever", "financial_leverage"),
            ("pro return on equity", "ro_equity"),
            ("pro return on invested capital", "ro_invested_captial"),
            ("pro interest coverage", "interest_coverage"),
            ("r% year over year", "revenue_perc_yoy"),
            ("r% 3-year", "revenue_perc_3y"),
            ("r% 5-year", "revenue_perc_5y"),
            ("r% 10-year", "revenue_perc_10y"),
            ("oi% year over year", "operating_income_yoy"),
            ("oi% 3-year", "operating_income_3y"),
            ("oi% 5-year", "operating_income_5y"),
            ("oi% 10-year", "operating_income_10y"),
            ("ni% year over year", "net_income_yoy"),
            ("ni% 3-year", "net_income_3y"),
            ("ni% 5-year", "net_income_5y"),
            ("ni% 10-year", "net_income_10y"),
            ("eps% year over year", "eps_yoy"),
            ("eps% 3-year", "eps_3y"),
            ("eps% 5-year", "eps_5y"),
            ("eps% 10-year", "eps_10y"),
            ("cf operating cash flow", "cash_flow_operating_growth_yoy"),
            ("cf cap ex", "cap_expense_perc_sales"),
            ("fh cash & short", "cash_short_term"),
            ("fh accounts receivable", "accounts_receivable"),
            ("fh inventory", "inventory"),
            ("fh other current assets", "other_cur_assets"),
            ("fh total current assets", "total_cur_assets"),
            ("fh net pp&e", "net_ppe"),
            ("fh intangibles", "intangibles"),
            ("fh other long-term assets", "other_long_term_assets"),
            ("fh accounts payable", "accounts_payable"),
            ("fh short-term debt", "short_term_debt"),
            ("fh taxes payable", "taxes_payable"),
            ("fh accured liabilities", "accured_liabilities"),
            ("fh other short-term liabilities", "short_term_liabilities"),
            ("fh long-term debt", "long_term_debt"),
            ("fh total liabilities & equity", "total_liabilities_equity"),
            ("fh total liabilities", "total_liabilities"),
            ("fh total stockholder", "total_stockholder"),
            ("fh current ratio", "current_ratio"),
            ("fh quick ratio", "quick_ratio"),
            ("fh debt/equity", "debt_equity"),
            ("er receivables turnover", "receivables_turnover"),
            ("er inventory turnover", "inventory_turnover"),
            ("er fixed assets turnover", "fixed_assets_turnover"),
        ))
        self.column_financials_map = tuple((
            ("fiscal year", "fiscal_year"),
            ("revenue", "revenue"),
            ("cost of revenue", "revenue_cost"),
            ("gross profit", "gross_profit"),
            ("sales, general and administrative", "sales_expense"),
            ("other operating", "operating_expense"),
            ("other assets", "other_assets"),
            ("operating income", "operating_income"),
            ("interest expense", "intrest_expense"),
            ("total operating expense", "total_costs"),
            ("total costs and expenses", "total_costs"),
            ("preferred dividend", "preferred_dividend"),
            ("income before", "income_before_taxes"),
            ("provision for", "provision_taxes"),
            ("net income from continuing op", "net_income_continuing_ops"),
            ("net income from discontinuing ops",
             "net_income_discontinuing_ops"),
            ("net income available to common shareholders",
             "net_income_common"),
            ("net income", "net_income"),
            ("eps basic", "eps_basic"),
            ("eps diluted", "eps_diluted"),
            ("waso basic", "waso_basic"),
            ("waso diluted", "waso_diluted"),
            ("ebitda", "ebitda"),
        ))

        self.special_key_titles = tuple((
            ("key ratios -> profitability", "pro "),
            ("key ratios -> growth", "gro "),
            ("key ratios -> cash flow", "cf "),
            ("key ratios -> financial health", "fh "),
            ("key ratios -> efficiency ratios", "er "),
            ("revenue %", "r% "),
            ("operating income %", "oi% "),
            ("net income %", "ni% "),
            ("eps %", "eps% "),
        ))
        self.special_financials_titles = tuple((
            ("earnings per share", "eps "),
            ("weighted average shares outstanding", "waso "),
        ))

        self.translation_table = dict.fromkeys(map(ord, '",'), None)
Ejemplo n.º 4
0
        imp = imp.fit(train_data)

        train_ticker_names = np.array(train_ticker_names, dtype=np.str)
        train_data = imp.transform(train_data)
        train_targets = np.array(train_targets, dtype=np.float)
        test_ticker_names = np.array(test_ticker_names, dtype=np.str)
        test_data = imp.transform(np.array(test_data, dtype=np.float))
        test_targets = np.array(test_targets, dtype=np.float)

        if not os.path.exists(self.dir_path):
            os.makedirs(self.dir_path)

        LOGGER.info("Saving file at: {}".format(self.file_path))

        np.savez(self.file_path,
                 train_data=train_data,
                 train_targets=train_targets,
                 train_ticker_names=train_ticker_names,
                 test_data=test_data,
                 test_targets=test_targets,
                 test_ticker_names=test_ticker_names)


if __name__ == "__main__":
    from sa.database import Session

    sess = Session()
    fc = FeatureHelper(sess)
    fc.generate_and_save_feature_data(independent=False)
    fc.screen_and_save_feature_data()
Ejemplo n.º 5
0
 def __init__(self):
     self.sess = Session()
Ejemplo n.º 6
0
 def __init__(self, url="http://www.tsx.com/resource/en/571"):
     self.today = datetime.today().date()
     self.session = Session()
     self.url = url
Ejemplo n.º 7
0
 def __init__(self):
     self.y_to_db_map = {
         'Forward P/E': 'forward_pe',
         'Return on Equity': 'ro_equity',
         'Current Ratio': 'current_ratio',
         'Total Debt': 'total_debt',
         'Forward Annual Dividend Rate': 'forward_annual_dividend_rate',
         'Last Split Date': 'last_split_date',
         'Market Cap (intraday)': 'market_cap',
         'EBITDA': 'ebitda',
         'Shares Short': 'shares_short',
         '50-Day Moving Average': 'fifty_day_moving_avg',
         '52 Week High': 'fifty_two_week_high',
         'Quarterly Earnings Growth': 'q_earnings_growth',
         'Forward Annual Dividend Yield': 'forward_annual_dividend_yield',
         'Beta': 'beta',
         'Payout Ratio': 'payout_ratio',
         'Avg Vol (3 month)': 'avg_vol_3_month',
         'Enterprise Value': 'enterprise_value',
         '5 Year Average Dividend Yield': 'five_year_avg_dividend_yield',
         'Enterprise Value/Revenue': 'enterprise_value_revenue',
         'Trailing P/E': 'trailing_pe',
         'Total Cash': 'total_cash',
         'Operating Cash Flow': 'operating_cash_flow',
         'Price/Book': 'price_book',
         'Fiscal Year Ends': 'fiscal_year_ends',
         'Total Debt/Equity': 'total_debt_equity',
         'Dividend Date': 'dividend_date',
         'Most Recent Quarter': 'most_recent_q',
         'Operating Margin': 'operating_margin',
         'Ex-Dividend Date': 'exdividend_date',
         '% Held by Institutions': 'perc_held_by_institutions',
         'Trailing Annual Dividend Yield': 'trailing_annual_dividend_yield',
         '200-Day Moving Average': 'two_hundred_day_moving_avg',
         '52 Week Low': 'fifty_two_week_low',
         'Avg Vol (10 day)': 'avg_vol_10_day',
         'Last Split Factor (new per old)': 'last_split_factor',
         '% Held by Insiders': 'perc_held_by_insiders',
         'Revenue Per Share': 'revenue_per_share',
         'Short Ratio': 'short_ratio',
         'Shares Short (prior month)': 'shares_short_prior_month',
         'Short % of Float': 'short_perc_float',
         'Profit Margin': 'profit_margin',
         'Return on Assets': 'ro_assets',
         'Price/Sales': 'price_sales',
         'Gross Profit': 'gross_profit',
         'Book Value Per Share': 'book_value_per_share',
         'Levered Free Cash Flow': 'levered_free_cash_flow',
         'Trailing Annual Dividend Rate': 'trailing_annual_dividend_rate',
         'Diluted EPS': 'diluted_eps',
         'PEG Ratio (5 yr expected)': 'peg_ratio_5yr',
         'Shares Outstanding': 'shares_outstanding',
         'Revenue': 'revenue',
         'Float': 'float',
         'Net Income Avi to Common': 'net_income_avi_common',
         'Enterprise Value/EBITDA': 'enterprise_value_ebitda',
         '52-Week Change': 'fifty_two_week_change',
         'Quarterly Revenue Growth': 'q_revenue_growth',
         'Total Cash Per Share': 'total_cash_ps'
     }
     self.convert_dict = {'K': 10**3, 'M': 10**6, 'B': 10**9, 'T': 10**12}
     self.condensed_pat = re.compile("([+-]?\d*[\.]?\d+)([kmbtKMBT])$")
     self.float_pat = re.compile("[+-]?\d*[\.]\d+$")
     self.parenthese_pat = re.compile(" *\(([^)]*)\)")
     self.date_line_pat = re.compile("\(as of (\d+.*\d+)\)")
     self.url_ticker_pat = re.compile(".*/quote/(.*)\.(.*)/key-statistics")
     self.keywords = set({"mrq", "ttm", "yoy", "lfy", "fye"})
     self.today = datetime.today().date()
     self.default_fye = datetime(self.today.year, 12, 31)
     self.session = Session()