def get_stock_news(symbol, query, collection=None): for query_format in querys: html = safe_request(query_format.format(symbol, query.split('GBGB')[0])) soup = BeautifulSoup(html, 'html.parser') for news in soup.find_all('li', class_='newsContainer'): info = news.find('a') title = info.string.strip() url = url_base + info['href'].strip().split( '.html')[0].strip().split("openWin2('")[-1] + '.html' date_str = news.find('span', class_='hour').string.strip() if len(date_str) <= 5: date_str = timezone.datetime.now().strftime( '%d %b %Y') + ' ' + date_str date = timezone.datetime.strptime(date_str, '%d %b %Y %H:%M') if collection: obj, created = collection.objects.get_or_create(Symbol=symbol) try: obj_news, created = obj.stocknews_set.get_or_create( pub_date=date, url=url, title=title) if created: obj_news.save() except: pass
def getLSEURLSymbol(symbol, collection=None): url = 'http://www.londonstockexchange.com/exchange/searchengine/search.html?q={}&page={}' new_query = "" for p in itertools.count(start=1): html = safe_request(url.format(symbol, p)) soup = BeautifulSoup(html, 'html.parser') table = soup.find('table', class_='table_dati') if table and not new_query: for tr in table.find('tbody').find_all('tr'): [sSymbol, sLink, sType] = tr.find_all('td')[:3] type = sType.string.strip() if type != 'Stocks': continue check = ''.join(map(lambda x: x if isinstance(x,str) else x.string, sSymbol.contents)).strip() if check == symbol.upper(): href = sLink.find('a')['href'] new_query = href.split('.html')[0].split('/')[-1] if collection: obj = collection.objects.get(Symbol=symbol) obj.Query = new_query obj.save() break else: break return new_query
def get(url): lists = [] n_pages = 1 index = 0 while index < n_pages: index += 1 html = safe_request(url.format(index)) soup = BeautifulSoup(html, 'html.parser') # get n_pages if n_pages == 1: n_pages = int( soup.find('div', class_='paging').find( 'p' ).string.split('of')[-1].strip() ) print 'Fetching page {} of {}'.format(index, n_pages) # parse list for tr in soup.find('tbody').find_all('tr'): tds = tr.find_all('td') #construct data info = {} info['symbol'] = tds[0].string.strip() a = tds[1].find('a') info['name'] = a.string.strip() info['query'] = a.get('href').strip().split( '/' )[-1].split('?')[0][:-5] lists.append(info) if collection: objs = collection.objects.filter(Symbol=info['symbol']) if objs.count() > 1: for q in objs.order_by("-pub_date")[1:]: q.delete() obj, _ = objs.get_or_create(Symbol=info['symbol']) obj.Query = info['query'] obj.Name = info['name'] obj.pub_date = timezone.now() obj.save() return lists
def get_stock_news(symbol, query, collection=None): for query_format in querys: html = safe_request(query_format.format(symbol, query.split('GBGB')[0])) soup = BeautifulSoup(html, 'html.parser') for news in soup.find_all('li', class_='newsContainer'): info = news.find('a') title = info.string.strip() url = url_base + info['href'].strip().split('.html')[0].strip().split("openWin2('")[-1] + '.html' date_str = news.find('span', class_='hour').string.strip() if len(date_str) <=5: date_str = timezone.datetime.now().strftime('%d %b %Y') + ' ' + date_str date = timezone.datetime.strptime(date_str, '%d %b %Y %H:%M') if collection: obj, created = collection.objects.get_or_create(Symbol=symbol) try: obj_news, created = obj.stocknews_set.get_or_create( pub_date=date, url=url, title=title ) if created: obj_news.save() except: pass
def getLSEInfo(query,symbol,collection=None): def valid_str(input_str): input_str = re.sub('[^0-9a-zA-Z]+', '', input_str) return input_str def valid_num(input_str): try: input_str = re.sub('[^0-9.-]+', '', input_str) if len(input_str) and input_str != '-': return float(input_str) except: pass return 0.0 header = {'User-Agent': 'Mozilla/5.0'} info = {} querys = [ query, getLSEURLSymbol(symbol, collection=collection) ] found = False for query in querys: try: html = safe_request(query_url.format(query)) soup = BeautifulSoup(html, 'html.parser') tIncome,tBalance,tRatio,tCompany,tTrading = soup.find_all('table') found = True break except: continue if not found: print 'Data not Available' return info # Income Table detail = {} for tr in tIncome.find('tbody').find_all('tr'): tds = tr.find_all('td') if len(tds) > 1: index = valid_str(tds[0].string) detail[index] = [valid_num(td.string) for td in tds[1:]] info['Income'] = detail # Balance Table detail = {} for tr in tBalance.find('tbody').find_all('tr'): tds = tr.find_all('td') if len(tds) > 1: index = valid_str(tds[0].string) detail[index] = [valid_num(td.string) for td in tds[1:]] info['Balance'] = detail # Ratio Table detail = {} for tr in tRatio.find('tbody').find_all('tr'): tds = tr.find_all('td') if len(tds) > 1: index = valid_str(tds[0].string) detail[index] = [valid_num(td.string) for td in tds[1:]] info['Ratio'] = detail # Company detail = {} for tr in tCompany.find('tbody').find_all('tr'): tds = tr.find_all('td') if len(tds) > 1: index = valid_str(tds[0].string) detail[index] = valid_num(tds[-1].string) if 'Marketcap' in index else tds[-1].string info['Company'] = detail # Trading detail = {} for tr in tTrading.find('tbody').find_all('tr'): tds = tr.find_all('td') if len(tds) > 1: index = valid_str(tds[0].string) detail[index] = valid_num(tds[-1].string) if 'Exchange' in index else tds[-1].string info['Trading'] = detail # Get Spread url = 'http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/summary/company-summary/{}.html' html = urllib2.urlopen( urllib2.Request( url.format(query),headers=header ) ) soup = BeautifulSoup(html, 'html.parser') try: tSummary = soup.find_all('table')[0] except: print 'Data not Available' return info detail = {} for tr in tSummary.find('tbody').find_all('tr'): tds = tr.find_all('td') if len(tds) > 1: for index, value in zip(tds[0::2],tds[1::2]): index = valid_str(index.string) detail[index] = value.string if 'Var' in index or 'Last' in index or 'status' in index or 'Special' in index or index == '' else valid_num(value.string) info['Summary'] = detail stats = {} # DATA orgainising -------------------------- stats['MarketCap'] = info['Company']['Marketcapinmillions'] stats['Profit'] = info['Income']['ProfitBeforeTax'][-1] stats['MPRatio'] = stats['MarketCap'] / stats['Profit'] if stats['Profit'] > 0 else 999 stats['PE'] = info['Ratio']['PERatioAdjusted'][-1] stats['EMS'] = info['Trading']['Exchangemarketsize'] offer = info['Summary']['Offer'] bid = info['Summary']['Bid'] stats['Spread'] = 100 * (offer - bid) / bid if bid > 0 else 99 stats['Dividend'] = info['Ratio']['DividendYield'][-1] stats['NetDebt'] = info['Balance']['TotalLiabilities'][-1] try: stats['Price'] = info['Summary']['PriceGBX'] except: stats['Price'] = 0 print 'Price is not GBP' stats['Bid'] = info['Summary']['Bid'] stats['Offer'] = info['Summary']['Offer'] stats['Liquidity'] = stats['EMS'] * stats['Price'] / 100.0 stats['DPRatio'] = stats['NetDebt'] / stats['Profit'] * -1.0 if stats['Profit'] != 0 else 3 # ------------------------------------------- info['stats'] = stats if collection: obj, created = collection.objects.get_or_create(Symbol=symbol) obj.MarketCap = stats['MarketCap'] obj.Profit = stats['Profit'] obj.MPRatio = stats['MPRatio'] obj.PE = stats['PE'] obj.EMS = stats['EMS'] obj.Bid = stats['Bid'] obj.Offer = stats['Offer'] obj.Spread = stats['Spread'] obj.Price = stats['Price'] obj.Dividend = stats['Dividend'] obj.NetDebt = stats['NetDebt'] obj.Liquidity = stats['Liquidity'] obj.DPRatio = stats['DPRatio'] obj.Sector = info['Trading']['FTSEsector'] obj.Catagory = info['Trading']['FTSEindex'] obj.ProfitTrend = get_slope(info['Income']['ProfitBeforeTax']) obj.DividendTrend = get_slope(info['Ratio']['DividendYield']) obj.DebtTrend = get_slope(info['Balance']['TotalLiabilities']) obj.pub_date = timezone.now() obj.save() return info