def announcement(): locals()['type_to_cn'] = type_to_cn nav = 'announcement' tab = 'raw' type_ = request.args.get('type', '') typecn = type_to_cn(type_) exchange = request.args.get('exchange', '') page = int(request.args.get('page', 1) or 1) limit = 50 skip = limit * (page - 1) cond = {} if type_: cond['type_'] = type_ if exchange: cond['exchange'] = exchange total = Announcement.count(cond) pagination = Pagination(page, limit, total) exchanges = list(sorted(list(e.abbr for e in Exchange.query()))) types = ['offer', 'result', 'stock'] announcements = list( Announcement.query(cond, sort=[('updated_at', -1)], skip=skip, limit=limit)) for a in announcements: a.typecn = type_to_cn(a.type_) ex = Exchange.query_one(sort=[('updated_at', -1)]) updated_at = None if not ex else ex.updated_at return render_template('frontend/announcement.html', **locals())
def parse(): nav = 'parse' url = request.args.get('url') num_parsed = Announcement.count({ 'parsed': True, 'type_': { '$in': ['offer', 'result'] } }) num_total = Announcement.count({'type_': {'$in': ['offer', 'result']}}) if url: announcement = Announcement.query_one({'url': url}) colls = list(Collection.query({'from_url': url})) for coll in colls: if coll.offers_at: coll.offers_at2 = coll.offers_at.strftime('%Y%m%d') if coll.offer_cash_ratio: coll.offer_cash_ratio = '{:2.0f}%'.format( coll.offer_cash_ratio * 100) if coll.offer_price: coll.offer_price = str(coll.offer_price) if coll.offer_price.endswith('.0'): coll.offer_price = coll.offer_price[:-2] all_done = num_parsed == num_total return render_template('admin/parse.html', **locals())
def parse_index(ex, type_, content, conf): text = content.decode(conf['encoding'], 'ignore') for values in re.compile(conf['detail'], re.DOTALL).findall(text): d = { key: re.sub(r'(</?[a-zA-Z]+>|\s+)', '', value.strip()) for key, value in zip(conf['fields'], values) } if 'relative' in conf and not d['url'].startswith('http'): d['url'] = conf['relative'] + d['url'] if Announcement.query_one({'url': d['url']}): continue if ex.abbr == '中港邮币卡': d['published_at'] = re.sub('<[^>]*>', '-', d['published_at']) if ex.abbr == '三点零': pa = d['published_at'] pa = re.sub('<[^>]*>', '', pa) d['published_at'] = pa[2:] + '/' + pa[:2] d['published_at'] = parse_datetime(d['published_at']) \ - timedelta(hours=8) d['exchange'] = ex._id d['type_'] = type_ content = session.get(d['url'], timeout=(5, 10)).content d['html'] = content.decode(conf['encoding'], 'ignore') d['html'] = d['html'].replace(conf['encoding'], 'utf-8') log.info('[{exchange}]{published_at}: {title}'.format(**d)) Announcement(d).upsert()
def parse_index(ex, type_, content, conf): text = content.decode(conf['encoding'], 'ignore') for values in re.compile(conf['detail'], re.DOTALL).findall(text): d = {key: re.sub(r'(</?[a-zA-Z]+>|\s+)', '', value.strip()) for key, value in zip(conf['fields'], values)} if 'relative' in conf and not d['url'].startswith('http'): d['url'] = conf['relative'] + d['url'] if Announcement.query_one({'url': d['url']}): continue if ex.abbr == '中港邮币卡': d['published_at'] = re.sub('<[^>]*>', '-', d['published_at']) if ex.abbr == '三点零': pa = d['published_at'] pa = re.sub('<[^>]*>', '', pa) d['published_at'] = pa[2:] + '/' + pa[:2] d['published_at'] = parse_datetime(d['published_at']) \ - timedelta(hours=8) d['exchange'] = ex._id d['type_'] = type_ content = session.get(d['url'], timeout=(5, 10)).content d['html'] = content.decode(conf['encoding'], 'ignore') d['html'] = d['html'].replace(conf['encoding'], 'utf-8') log.info('[{exchange}]{published_at}: {title}'.format(**d)) Announcement(d).upsert()
def announcement_feed(): def bjdate(d): from datetime import timedelta return (d + timedelta(hours=8)).strftime('%Y年%m月%d日') type_ = request.args.get('type', '') typecn = type_to_cn(type_) exchange = request.args.get('exchange', '') cond = {} feedtitle = '邮币卡公告聚合' if type_: cond['type_'] = type_ feedtitle += ' - {}'.format(typecn) if exchange: cond['exchange'] = exchange feedtitle += ' - {}'.format(exchange) feed = AtomFeed(feedtitle, feed_url=request.url, url=request.url_root) announcements = list( Announcement.query(cond, sort=[('updated_at', -1)], limit=20)) for a in announcements: feed.add('{} {}'.format(bjdate(a.published_at), a.title.strip()), '更多内容请点击标题连接', content_type='text', author=a.exchange, url=a.url, updated=a.updated_at, published=a.published_at) return feed.get_response()
def index(): nav = 'index' exchanges = CONFS for e in exchanges: urls = e['offer']['index'] if isinstance(urls, list): e['offer']['index_url'] = urls[0] else: e['offer']['index_url'] = urls urls = e['result']['index'] if isinstance(urls, list): e['result']['index_url'] = urls[0] else: e['result']['index_url'] = urls announcements = [a for a in Announcement.query( sort=[('published_at', -1)], limit=len(exchanges))] for a in announcements: a.type_ = { 'offer': '申购', 'result': '中签', 'stock': '托管', }.get(a.type_, '托管') return render_template('frontend/index.html', **locals())
def parse_findone(): announcement = Announcement.query_one( {"parsed": False, "type_": {"$in": ["offer", "result"]}}, sort=[("published_at", -1)] ) if announcement: return redirect(url_for("admin.parse", url=announcement.url)) else: return redirect(url_for("admin.parse"))
def parse(): nav = "parse" url = request.args.get("url") num_parsed = Announcement.count({"parsed": True, "type_": {"$in": ["offer", "result"]}}) num_total = Announcement.count({"type_": {"$in": ["offer", "result"]}}) if url: announcement = Announcement.query_one({"url": url}) colls = list(Collection.query({"from_url": url})) for coll in colls: if coll.offers_at: coll.offers_at2 = coll.offers_at.strftime("%Y%m%d") if coll.offer_cash_ratio: coll.offer_cash_ratio = "{:2.0f}%".format(coll.offer_cash_ratio * 100) if coll.offer_price: coll.offer_price = str(coll.offer_price) if coll.offer_price.endswith(".0"): coll.offer_price = coll.offer_price[:-2] all_done = num_parsed == num_total return render_template("admin/parse.html", **locals())
def parse_save(): exchange = request.form.get('exchange') status = request.form.get('status', '申购中') from_url = request.form.get('from_url') type_ = request.form.get('type') result = json.loads(request.form.get('result', [])) if not exchange: return jsonify(status=500, reason="字段不全") from_url = html.unescape(from_url) if type_ == 'offer': for coll in result: coll['exchange'] = exchange coll['status'] = status coll['from_url'] = from_url coll['offer_quantity'] = int(coll['offer_quantity']) coll['offer_price'] = float(coll['offer_price']) coll['offers_at'] = datetime.strptime(coll['offers_at'], '%Y%m%d') coll['offer_cash_ratio'] = int(coll['offer_cash_ratio'].replace( '%', '')) / 100. Collection(coll).upsert() Announcement.update_one({'_id': from_url}, {'$set': {'parsed': True}}) elif type_ == 'result': for coll in result: coll['exchange'] = exchange coll['status'] = status coll['from_url'] = from_url if coll.get('invest_cash'): coll['invest_cash'] = float(coll['invest_cash']) else: del coll['invest_cash'] if coll.get('invest_cash_return_ratio'): coll['invest_cash_return_ratio'] = float( coll['invest_cash_return_ratio'].replace('%', '')) / 100. else: del coll['invest_cash_return_ratio'] Collection(coll).upsert() Announcement.update_one({'_id': from_url}, {'$set': {'parsed': True}}) return jsonify(status=200)
def parse_save(): exchange = request.form.get("exchange") status = request.form.get("status", "申购中") from_url = request.form.get("from_url") type_ = request.form.get("type") result = json.loads(request.form.get("result", [])) if not exchange: return jsonify(status=500, reason="字段不全") from_url = html.unescape(from_url) if type_ == "offer": for coll in result: coll["exchange"] = exchange coll["status"] = status coll["from_url"] = from_url coll["offer_quantity"] = int(coll["offer_quantity"]) coll["offer_price"] = float(coll["offer_price"]) coll["offers_at"] = datetime.strptime(coll["offers_at"], "%Y%m%d") coll["offer_cash_ratio"] = int(coll["offer_cash_ratio"].replace("%", "")) / 100.0 Collection(coll).upsert() Announcement.update_one({"_id": from_url}, {"$set": {"parsed": True}}) elif type_ == "result": for coll in result: coll["exchange"] = exchange coll["status"] = status coll["from_url"] = from_url if coll.get("invest_cash"): coll["invest_cash"] = float(coll["invest_cash"]) else: del coll["invest_cash"] if coll.get("invest_cash_return_ratio"): coll["invest_cash_return_ratio"] = float(coll["invest_cash_return_ratio"].replace("%", "")) / 100.0 else: del coll["invest_cash_return_ratio"] Collection(coll).upsert() Announcement.update_one({"_id": from_url}, {"$set": {"parsed": True}}) return jsonify(status=200)
def parse_findone(): announcement = Announcement.query_one( { 'parsed': False, 'type_': { '$in': ['offer', 'result'] } }, sort=[('published_at', -1)]) if announcement: return redirect(url_for('admin.parse', url=announcement.url)) else: return redirect(url_for('admin.parse'))
def parse(site): rabbrs = {v: k for k, v in ABBRS.items()} abbr = rabbrs[site] parser = importlib.__import__('ybk.parsers.{}'.format(site), fromlist=['Parser']).Parser() log.info('解析交易所 {}'.format(abbr)) num_parsed = 0 num_failed = 0 for a in Announcement.query({'exchange': abbr, 'parsed': {'$ne': True}}): log.info('parsing {}'.format(a.url)) try: for c in parser.parse(a.type_, a.html): c['from_url'] = a.url Collection(c).upsert() a.update({'$set': {'parsed': True}}) num_parsed += 1 except Exception as e: num_failed += 1 if not isinstance(e, NotImplementedError): log.exception('解析错误') continue log.info('解析完毕, {}个成功, {}个失败'.format(num_parsed, num_failed))
def parse_remove(): _id = request.form.get("_id") Announcement.delete_one({"_id": _id}) return jsonify(status=200)
def parse_remove(): _id = request.form.get('_id') Announcement.delete_one({'_id': _id}) return jsonify(status=200)