def collect_notice(stock_number, stock_name): req_url = company_accouncement.format(stock_number) notice_list_html = send_request(req_url) notice_list_soup = BeautifulSoup(notice_list_html, "lxml") notice_list = notice_list_soup.find("div", class_="cont").find_all("li") for i in notice_list: notice_title = i.find("span", class_="title").text notice_cate = i.find("span", class_="cate").text notice_date_str = i.find("span", class_="date").text notice_date = datetime.datetime.strptime(notice_date_str, "%Y-%m-%d") notice_url = eastmoney_data + i.find("a").get("href") if not check_duplicate(notice_title, notice_cate, notice_date): notice_html = send_request(notice_url) notice_soup = BeautifulSoup(notice_html, "lxml") notice_content = notice_soup.find("pre").text stock_notice = StockNotice( stock_number=stock_number, stock_name=stock_name, notice_title=notice_title, notice_cate=notice_cate, notice_date=notice_date, notice_url=notice_url, notice_content=notice_content, ) stock_notice.save() time.sleep(random.random())
def collect_notice(stock_info): req_url = company_accouncement.format(stock_info.stock_number) notice_list_html = send_request(req_url) notice_list_soup = BeautifulSoup(notice_list_html, 'lxml') notice_list = notice_list_soup.find('div', class_='cont').find_all('li') for i in notice_list: notice_title = i.find('span', class_='title').text notice_cate = i.find('span', class_='cate').text notice_date_str = i.find('span', class_='date').text notice_date = datetime.datetime.strptime(notice_date_str, '%Y-%m-%d') notice_url = eastmoney_data + i.find('a').get('href') if not check_duplicate(notice_title, notice_cate, notice_date): notice_html = send_request(notice_url) notice_soup = BeautifulSoup(notice_html, 'lxml') notice_content = notice_soup.find('pre').text stock_notice = StockNotice(stock_number=stock_info.stock_number, stock_name=stock_info.stock_name, notice_title=notice_title, notice_cate=notice_cate, notice_date=notice_date, notice_url=notice_url, notice_content=notice_content) stock_notice.save() time.sleep(random.random())
def collect_notice(stock_info): req_url = company_notice.format(stock_info.stock_number) raw_data = send_request(req_url).replace('var', '').replace('=', '').replace(';', '').strip() notice_data = json.loads(raw_data).get('data', []) for n in notice_data: notice_title = n.get('NOTICETITLE') notice_code = n.get('INFOCODE') notice_date = datetime.datetime.strptime(n.get('NOTICEDATE').split('T')[0], '%Y-%m-%d') notice_url = single_notice.format(stock_info.stock_number, notice_code) if not is_exists(notice_code): stock_notice = StockNotice(title=notice_title, code=notice_code, date=notice_date, content_url=notice_url, stock_number=stock_info.stock_number, stock_name=stock_info.stock_name) stock_notice.save()
def check_duplicate(notice_title, notice_cate, notice_date): cursor = StockNotice.objects(Q(notice_title=notice_title) & Q(notice_cate=notice_cate) & Q(notice_date=notice_date)) if cursor: return True else: return False
def is_exists(notice_code): cursor = StockNotice.objects(code=notice_code) if cursor: return True else: return False
def check_duplicate(notice_title, notice_date): cursor = StockNotice.objects( Q(notice_title=notice_title) & Q(notice_date=notice_date)) if cursor: return True else: return False
def query_stock_notice(date, keyword=u'购买理财产品'): sn = SN.objects(Q(notice_title__contains=keyword) & Q(notice_date__gte=date)) stocks = list(set([(i.stock_number, i.stock_name) for i in sn])) stocks.sort() print len(stocks) for k, v in trading_market.iteritems(): filtered_stocks = [s for s in stocks if s[0].startswith(k)] print '---------%s---%s---------' % (v, len(filtered_stocks)) for i in filtered_stocks: print i[0], i[1]
def query_stock_notice(date, keyword=u'购买理财产品'): sn = SN.objects( Q(notice_title__contains=keyword) & Q(notice_date__gte=date)) stocks = list(set([(i.stock_number, i.stock_name) for i in sn])) stocks.sort() print len(stocks) for k, v in trading_market.iteritems(): filtered_stocks = [s for s in stocks if s[0].startswith(k)] print '---------%s---%s---------' % (v, len(filtered_stocks)) for i in filtered_stocks: print i[0], i[1]
def collect_notice(stock_info): req_url = company_notice.format(stock_info.stock_number) raw_data = send_request(req_url).replace('var', '').replace('=', '').replace( ';', '').strip() notice_data = json.loads(raw_data).get('data', []) for n in notice_data: notice_title = n.get('NOTICETITLE') notice_code = n.get('INFOCODE') notice_date = datetime.datetime.strptime( n.get('NOTICEDATE').split('T')[0], '%Y-%m-%d') notice_url = single_notice.format(stock_info.stock_number, notice_code) if not is_exists(notice_code): stock_notice = StockNotice(title=notice_title, code=notice_code, date=notice_date, content_url=notice_url, stock_number=stock_info.stock_number, stock_name=stock_info.stock_name) stock_notice.save()
def collect_event_notice(stock_number): global time_interval today = datetime.date.today() delta = datetime.timedelta(days=time_interval) cursor = SN.objects(Q(stock_number=stock_number) & Q(date__gte=today-delta)).order_by('date') if not cursor: return [] notice = [] for n in cursor: for i in mining_keywords: if i in n.title: notice.append({'url': n.content_url, 'date': n.date, 'stock_number': n.stock_number}) break return notice
def collect_event_notice(stock_number): global time_interval today = datetime.date.today() delta = datetime.timedelta(days=time_interval) cursor = SN.objects( Q(stock_number=stock_number) & Q(notice_date__gte=today - delta)).order_by('notice_date') if not cursor: return [] notice = [] for n in cursor: for i in mining_keywords: if i in n.notice_title: notice.append({ 'url': n.notice_url, 'date': n.notice_date, 'stock_number': n.stock_number }) return notice