def test_indicators_from_json(self):
     # Create new company
     symbol = "ZZZZZ"
     name = "Fake Name"
     d = {'symbol': symbol, "roe": 0.25, "fcf": 100.0, "name": name}
     Indicators.from_json(d)
     c = Company.query.filter_by(symbol=symbol)
     self.assertIsNotNone(c)
     # use existing company
     d = {"roe": 0.25, "fcf": 100.0}
     Indicators.from_json(d)
def get_ratio_data():
    import socket
    import re
    import time
    import dryscrape
    import webkit_server
    from random import randint
    from fake_useragent import UserAgent
    from bs4 import BeautifulSoup
    from selenium.webdriver import PhantomJS


    from app.models import Company, Indicators
    from app.utils import cash_to_float, depercentize


    # Dict item with list: element attribute, attribute value to look for, optional transform function
    indicators = {'roe': {
                          'attribute': 'data-reactid',
                          'value': re.compile(".*RETURN_ON_EQUITY\.1$"),
                          'transform': depercentize,
                          },
                  'fcf': {
                          'attribute': 'data-reactid',
                          'value': re.compile(".*LEVERED_FREE_CASH_FLOW\.1$"),
                          'transform': cash_to_float,
                          },
                  }


    ua = UserAgent()


    #with open("10.csv", "r") as f:
    #with open("sp500-2.csv", "r") as f:
    with open("10_stocks", "r") as f:
        data = f.read()

    symbols = []
    for i in data.split("\n"):
        if i:

            symbols.append(i.split(",")[0])

    print("Iterate through symbols")
    ## dryscrape
    #session = dryscrape.Session()
    #session.set_header('User-Agent', ua.random)
    #session.set_timeout(5)

    for symbol in symbols:
        print("{} Fetching {}  :".format(time.strftime("%H:%M:%S"), symbol))

        import pdb; pdb.set_trace()
        #driver = MyWebDriver()
        driver = PhantomJS()
        driver.set_window_size(1120, 550)
        driver.get("http://finance.yahoo.com/quote/{}/key-statistics?p={}".format(symbol, symbol))

        ##try:
        ##    session = dryscrape.Session()
        ##except socket.error as e:
        ##    print("Failed to configure session {}".format(e))
        ##    continue

        ##session.set_header('User-Agent', ua.random)
        ##session.set_timeout(30)
        #try:
        #    #session.visit("http://finance.yahoo.com/quote/{}/key-statistics?p={}".format(symbol, symbol))
        #except Exception as e:
        #    print e, "try once more......"
        #    session.reset()
        #    time.sleep(5)
        #    session = dryscrape.Session()
        #    #session.set_header('User-Agent', ua.random)
        #    try:
        #        session.set_timeout(5)
        #        session.visit("http://finance.yahoo.com/quote/{}/key-statistics?p={}".format(symbol, symbol))
        #    except Exception as e:
        #        print e, "done trying..."
        #        session.reset()
        #        time.sleep(2)
        #        session = dryscrape.Session()
        #        continue


        #except socket.error as e:
        #    print("Failed to get {}, {} (1)".format(symbol, e))
        #    continue
        #except webkit_server.EndOfStreamError as e:
        #    print("Failed to get {}, {}, breaking (2)".format(symbol, e))
        #    continue
        #except webkit_server.InvalidResponseError as e:
        #    print("Failed to get {}, {}, breaking (3)".format(symbol, e))
        #    continue

        #response = session.body()
        #soup = BeautifulSoup(response, "lxml")

        with open("{}.out".format(symbol), "w") as f:
            f.write(driver.page_source.encode('utf-8'))

        soup = BeautifulSoup(driver.page_source, "lxml")

        d = {'symbol': symbol}
        for indicator in indicators.keys():
            curr_ind = indicators[indicator]
            s = soup.find_all(attrs={curr_ind['attribute']: curr_ind['value']})
            print indicator, s

            for element in s:
                if curr_ind.has_key('transform'):
                    f = curr_ind['transform']
                    #print(f(element.text))
                    d[indicator] = f(element.text)
                else:
                    #print(element.text)
                    d[indicator] = element.text

        try:
            db.session.add(Indicators.from_json(d))
            db.session.commit()
        except (IntegrityError, UnmappedInstanceError) as e:
            print "Caught", e
            db.session.rollback()

        print "indicators", d
def get_ratio_data():
    import re
    import time
    from bs4 import BeautifulSoup

    from app.models import Company, Indicators
    from app.utils import cash_to_float, depercentize


    # Dict item with list: element attribute, attribute value to look for, optional transform function
    indicators = {'roe': {
                          'attribute': 'data-reactid',
                          'value': re.compile(".*RETURN_ON_EQUITY\.1$"),
                          'transform': depercentize,
                          },
                  'fcf': {
                          'attribute': 'data-reactid',
                          'value': re.compile(".*LEVERED_FREE_CASH_FLOW\.1$"),
                          'transform': cash_to_float,
                          },
                  'ev2ebitda': {
                          'attribute': 'data-reactid',
                          'value': re.compile(".*ENTERPRISE_VALUE_TO_EBITDA\.1$"),
                          },
                  }


    companies = Company.query.with_entities(Company.symbol).all()
    symbols = [company[0] for company in companies]

    print("Iterate through symbols")

    for symbol in symbols:
        print("{} Fetching {}  :".format(time.strftime("%H:%M:%S"), symbol))

        #driver = MyWebDriver()
        retry_current = 0
        retry_limit = 5
        while retry_current < retry_limit:
            try:
                driver = PhantomJS()
            except URLError:
                time.sleep(retry_current**2)
            retry_current += 1

        driver.set_window_size(1120, 550)
        driver.get("http://finance.yahoo.com/quote/{}/key-statistics?p={}".format(symbol, symbol))
        try:
            #WebDriverWait(driver, 10).until(EC.title_contains("AAPL Key Statistics | Apple Inc. Stock - Yahoo Finance"))
            #element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "td[reactid]")))
            #element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//td[@data-reactid[ends-with(., 'RETURN_ON_EQUITY.1')]]")))

            # these two seem to work...
            element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[substring(@data-reactid, string-length(@data-reactid) - string-length('RETURN_ON_EQUITY.1') +1) = 'RETURN_ON_EQUITY.1']")))
            #element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//*[contains(@data-reactid,'RETURN_ON_EQUITY.1')]")))

            #"//input[@id[ends-with(.,'register')]]"
        except TimeoutException as e:
            print  "Caught", e
            print driver.title
            continue

        #time.sleep(5)

        #with open("{}.out".format(symbol), "w") as f:
        #    f.write(driver.page_source.encode('utf-8'))

        soup = BeautifulSoup(driver.page_source, "lxml")

        d = {'symbol': symbol}
        for indicator in indicators.keys():
            curr_ind = indicators[indicator]
            s = soup.find_all(attrs={curr_ind['attribute']: curr_ind['value']})
            print indicator, s

            for element in s:
                if curr_ind.has_key('transform'):
                    f = curr_ind['transform']
                    #print(f(element.text))
                    d[indicator] = f(element.text)
                else:
                    #print(element.text)
                    d[indicator] = element.text

        try:
            db.session.add(Indicators.from_json(d))
            db.session.commit()
        except (IntegrityError, UnmappedInstanceError) as e:
            print "Caught", e
            db.session.rollback()

        print "indicators", d