def get_stock_news(symbol, query, collection=None):
    for query_format in querys:
        html = safe_request(query_format.format(symbol,
                                                query.split('GBGB')[0]))
        soup = BeautifulSoup(html, 'html.parser')

        for news in soup.find_all('li', class_='newsContainer'):
            info = news.find('a')
            title = info.string.strip()
            url = url_base + info['href'].strip().split(
                '.html')[0].strip().split("openWin2('")[-1] + '.html'
            date_str = news.find('span', class_='hour').string.strip()
            if len(date_str) <= 5:
                date_str = timezone.datetime.now().strftime(
                    '%d %b %Y') + ' ' + date_str
            date = timezone.datetime.strptime(date_str, '%d %b %Y %H:%M')

            if collection:
                obj, created = collection.objects.get_or_create(Symbol=symbol)
                try:
                    obj_news, created = obj.stocknews_set.get_or_create(
                        pub_date=date, url=url, title=title)
                    if created:
                        obj_news.save()
                except:
                    pass
def getLSEURLSymbol(symbol, collection=None):
    url = 'http://www.londonstockexchange.com/exchange/searchengine/search.html?q={}&page={}'

    new_query = ""
    for p in itertools.count(start=1):
        html = safe_request(url.format(symbol, p))
        soup = BeautifulSoup(html, 'html.parser')
        table = soup.find('table', class_='table_dati')
        if table and not new_query:
            for tr in table.find('tbody').find_all('tr'):
                [sSymbol, sLink, sType] = tr.find_all('td')[:3]
                type = sType.string.strip()
                if type != 'Stocks':
                    continue
                check = ''.join(map(lambda x: x if isinstance(x,str) else x.string, sSymbol.contents)).strip()
                if check == symbol.upper():
                    href = sLink.find('a')['href']
                    new_query = href.split('.html')[0].split('/')[-1]
                    if collection:
                        obj = collection.objects.get(Symbol=symbol)
                        obj.Query = new_query
                        obj.save()
                    break

        else:
            break

    return new_query
Esempio n. 3
0
    def get(url):
        lists = []
        n_pages = 1
        index = 0

        while index < n_pages:
            index += 1
            html = safe_request(url.format(index))
            soup = BeautifulSoup(html, 'html.parser')

            # get n_pages
            if n_pages == 1:
                n_pages = int(
                    soup.find('div', class_='paging').find(
                        'p'
                    ).string.split('of')[-1].strip()
                )

            print 'Fetching page {} of {}'.format(index, n_pages)

            # parse list
            for tr in soup.find('tbody').find_all('tr'):
                tds = tr.find_all('td')
                #construct data
                info = {}
                info['symbol'] = tds[0].string.strip()
                a = tds[1].find('a')
                info['name'] = a.string.strip()
                info['query'] = a.get('href').strip().split(
                    '/'
                )[-1].split('?')[0][:-5]

                lists.append(info)

                if collection:
                    objs = collection.objects.filter(Symbol=info['symbol'])
                    if objs.count() > 1:
                        for q in objs.order_by("-pub_date")[1:]:
                            q.delete()
                    obj, _ = objs.get_or_create(Symbol=info['symbol'])
                    obj.Query = info['query']
                    obj.Name = info['name']
                    obj.pub_date = timezone.now()
                    obj.save()

        return lists
def get_stock_news(symbol, query, collection=None):
    for query_format in querys:
        html = safe_request(query_format.format(symbol, query.split('GBGB')[0]))
        soup = BeautifulSoup(html, 'html.parser')

        for news in soup.find_all('li', class_='newsContainer'):
            info = news.find('a')
            title = info.string.strip()
            url = url_base + info['href'].strip().split('.html')[0].strip().split("openWin2('")[-1] + '.html'
            date_str = news.find('span', class_='hour').string.strip()
            if len(date_str) <=5:
                date_str = timezone.datetime.now().strftime('%d %b %Y') + ' ' + date_str
            date = timezone.datetime.strptime(date_str, '%d %b %Y %H:%M')

            if collection:
                obj, created = collection.objects.get_or_create(Symbol=symbol)
                try:
                    obj_news, created = obj.stocknews_set.get_or_create(
                        pub_date=date, url=url, title=title
                    )
                    if created:
                        obj_news.save()
                except:
                    pass
def getLSEInfo(query,symbol,collection=None):

    def valid_str(input_str):
        input_str = re.sub('[^0-9a-zA-Z]+', '', input_str)
        return input_str

    def valid_num(input_str):
        try:
            input_str = re.sub('[^0-9.-]+', '', input_str)

            if len(input_str) and input_str != '-':
                return float(input_str)
        except:
            pass
        return 0.0

    header = {'User-Agent': 'Mozilla/5.0'}

    info = {}

    querys = [
        query,
        getLSEURLSymbol(symbol, collection=collection)
    ]

    found = False
    for query in querys:
        try:
            html = safe_request(query_url.format(query))
            soup = BeautifulSoup(html, 'html.parser')
            tIncome,tBalance,tRatio,tCompany,tTrading = soup.find_all('table')
            found = True
            break
        except:
            continue

    if not found:
        print 'Data not Available'
        return info


    # Income Table
    detail = {}
    for tr in tIncome.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) > 1:
            index = valid_str(tds[0].string)
            detail[index] = [valid_num(td.string) for td in tds[1:]]
    info['Income'] = detail

    # Balance Table
    detail = {}
    for tr in tBalance.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) > 1:
            index = valid_str(tds[0].string)
            detail[index] = [valid_num(td.string) for td in tds[1:]]
    info['Balance'] = detail

    # Ratio Table
    detail = {}
    for tr in tRatio.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) > 1:
            index = valid_str(tds[0].string)
            detail[index] = [valid_num(td.string) for td in tds[1:]]
    info['Ratio'] = detail

    # Company
    detail = {}
    for tr in tCompany.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) > 1:
            index = valid_str(tds[0].string)
            detail[index] = valid_num(tds[-1].string) if 'Marketcap' in index else tds[-1].string
    info['Company'] = detail

    # Trading
    detail = {}
    for tr in tTrading.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) > 1:
            index = valid_str(tds[0].string)
            detail[index] = valid_num(tds[-1].string) if 'Exchange' in index else tds[-1].string
    info['Trading'] = detail

    # Get Spread
    url = 'http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/summary/company-summary/{}.html'
    html = urllib2.urlopen(
        urllib2.Request(
            url.format(query),headers=header
        )
    )
    soup = BeautifulSoup(html, 'html.parser')
    try:
        tSummary = soup.find_all('table')[0]
    except:
        print 'Data not Available'
        return info

    detail = {}
    for tr in tSummary.find('tbody').find_all('tr'):
        tds = tr.find_all('td')
        if len(tds) > 1:
            for index, value in zip(tds[0::2],tds[1::2]):
                index = valid_str(index.string)
                detail[index] = value.string if 'Var' in index or 'Last' in index or 'status' in index or 'Special' in index or index == '' else valid_num(value.string)
    info['Summary'] = detail

    stats = {}
    # DATA orgainising --------------------------
    stats['MarketCap'] = info['Company']['Marketcapinmillions']
    stats['Profit'] = info['Income']['ProfitBeforeTax'][-1]
    stats['MPRatio'] = stats['MarketCap'] / stats['Profit'] if stats['Profit'] > 0 else 999
    stats['PE'] = info['Ratio']['PERatioAdjusted'][-1]
    stats['EMS'] = info['Trading']['Exchangemarketsize']
    offer = info['Summary']['Offer']
    bid = info['Summary']['Bid']
    stats['Spread'] = 100 * (offer - bid) / bid if bid > 0 else 99
    stats['Dividend'] = info['Ratio']['DividendYield'][-1]
    stats['NetDebt'] = info['Balance']['TotalLiabilities'][-1]
    try:
        stats['Price'] = info['Summary']['PriceGBX']
    except:
        stats['Price'] = 0
        print 'Price is not GBP'
    stats['Bid'] = info['Summary']['Bid']
    stats['Offer'] = info['Summary']['Offer']
    stats['Liquidity'] = stats['EMS'] * stats['Price'] / 100.0
    stats['DPRatio'] = stats['NetDebt'] / stats['Profit'] * -1.0 if stats['Profit'] != 0 else 3
    # -------------------------------------------
    info['stats'] = stats

    if collection:
        obj, created = collection.objects.get_or_create(Symbol=symbol)
        obj.MarketCap = stats['MarketCap']
        obj.Profit = stats['Profit']
        obj.MPRatio = stats['MPRatio']
        obj.PE = stats['PE']
        obj.EMS = stats['EMS']
        obj.Bid = stats['Bid']
        obj.Offer = stats['Offer']
        obj.Spread = stats['Spread']
        obj.Price = stats['Price']
        obj.Dividend = stats['Dividend']
        obj.NetDebt = stats['NetDebt']
        obj.Liquidity = stats['Liquidity']
        obj.DPRatio = stats['DPRatio']
        obj.Sector = info['Trading']['FTSEsector']
        obj.Catagory = info['Trading']['FTSEindex']
        obj.ProfitTrend = get_slope(info['Income']['ProfitBeforeTax'])
        obj.DividendTrend = get_slope(info['Ratio']['DividendYield'])
        obj.DebtTrend = get_slope(info['Balance']['TotalLiabilities'])
        obj.pub_date = timezone.now()
        obj.save()

    return info