def set_tag(): """ { name: ['tagname'], tab: 'tabname' } """ try: data = json.loads(request.data) tags = data['name'] tabname = data['tab'] assert isinstance(tabname, basestring) assert isinstance(tags, list) for tag in tags: assert isinstance(tag, basestring) except: return {'error': 'illegal format'} u = get_user(current_user.username) for tab in u['tab']: if tabname == tab['name']: tab['tags'] = tags get_mongo('user').save(u) log_info('user {0} set tag to {1} on tab \ {2}'.format(current_user.username, tags, tabname)) return {'tabs': u['tab']} return {'error': 'no such tab'}
def add_tab(): """add a new tab to current user { name: 'string' # name of the tab priority: integer # bigger, better. default to be 0 } """ try: data = json.loads(request.data) name = data['name'] priority = int(data.get('priority', 0)) assert isinstance(name, basestring) except: return {'error': 'illegal format'} username = current_user.username doc = get_user(username) for tab in doc['tab']: if tab['name'] == name: return {'error': 'tab with this name already exists!'} doc['tab'].append({ 'name': name, 'priority': priority, 'tags': [] }) get_mongo('user').save(doc) log_info('user {0} add tab {1}'.format(username, name)) return {'success': 1}
def del_tag(): """ GET /del_tag?name=xxx&tab=xxx """ try: data = request.args tagname = data['name'] tabname = data['tab'] assert isinstance(tagname, basestring) \ and isinstance(tabname, basestring) except: return {'error': 'illegal format'} u = get_user(current_user.username) for tab in u['tab']: if tabname == tab['name']: try: tab['tags'].remove(tagname) except ValueError: return {'error': 'tag {0} not in tab {1}'.format(tagname, tabname)} get_mongo('user').save(u) log_info('user {0} del tag {1} in tab \ {2}'.format(current_user.username, tagname, tabname)) return {'tabs': u['tab']} return {'error': 'no such tab'}
def start_server(): """start the server for all general fetchers""" import_all_modules(__file__, __name__) if not register_fetcher.fetcher_list: uklogger.log_err('no general fetcher available, exit') return def worker(fetcher): try: while True: uklogger.log_info('run fetcher {}'.format( fetcher.fetcher_name)) fetcher.run() gevent.sleep(fetcher.sleep_time) except KeyboardInterrupt: uklogger.log_info('worker {} got KeyboardInterrupt, exit'.format( fetcher.fetcher_name)) jobs = [gevent.spawn(worker, i) for i in register_fetcher.fetcher_list] try: for i in jobs: i.join() except KeyboardInterrupt: uklogger.log_info('got KeyboardInterrupt, exit')
def register(): """user registration api. username: string password: string XXX TODO """ try: username = request.values['username'] password = request.values['password'] assert isinstance(username, basestring) assert isinstance(password, basestring) except: return {'error': 'illegal format'} if len(username) < 3 or len(password) < 3: return {'error': 'length of username and password must' + ' be at least 3 characters'} exist = get_user(username) if exist: return {"error": "user {0} already exists".format(username)} db = get_mongo('user') db.insert({ 'username': username, 'password': password, 'tab': [] }) log_info('new user: {0}:{1}'.format(username, password)) return {'success': 1}
def add_tag(): """ { name: 'tagname', tab: 'tabname' } """ try: data = json.loads(request.data) tagname = data['name'] tabname = data['tab'] assert isinstance(tagname, basestring) \ and isinstance(tabname, basestring) except: return {'error': 'illegal format'} u = get_user(current_user.username) for tab in u['tab']: if tabname == tab['name']: l = tab['tags'] l.append(tagname) l = list(set(l)) tab['tags'] = l get_mongo('user').save(u) log_info('user {0} add tag {1} to tab \ {2}'.format(current_user.username, tagname, tabname)) return {'tabs': u['tab']} return {'error': 'no such tab'}
def worker(fetcher): try: while True: uklogger.log_info('run fetcher {}'.format( fetcher.fetcher_name)) fetcher.run() gevent.sleep(fetcher.sleep_time) except KeyboardInterrupt: uklogger.log_info('worker {} got KeyboardInterrupt, exit'.format( fetcher.fetcher_name))
def xkcd_rss_fetcher(ctx): """fetcher xkcd.com/rss.xml, save each title with tag `xkcd`""" URL = 'http://www.xkcd.com/rss.xml' coll = ctx.get_mongo_collection() for entry in fetch_rss(URL).entries: try: coll.insert({'_id': entry.id}) except DuplicateKeyError: continue ctx.new_item(TextOnlyItem(entry.title, entry.description), ['xkcd'], parse_entry_time(entry), {'id': entry.id}) log_info(u'xkcd rss: new entry: {} {}'.format(entry.id, entry.title))
def on_user_activated(user_id): try: user_fetcher = get_db_set(user_id, 'fetcher') for i in user_fetcher: fetcher = register_fetcher.fetcher_map.get(i) if fetcher is None: uklogger.log_err( 'fetcher {} not exist, requested by user {}'.format( i, user_id)) else: uklogger.log_info('run fetcher {} for user {}'.format( i, user_id)) fetcher.run(user_id) except Exception as ex: uklogger.log_exc(ex)
def zhihu_rss_fetcher(ctx): """fetcher zhihu.com/rss/, save each title with tag `zhihu`""" URL = 'http://www.zhihu.com/rss' coll = ctx.get_mongo_collection() for entry in fetch_rss(URL).entries: try: coll.insert({'_id': entry.link}) except DuplicateKeyError: continue ctx.new_item(TextOnlyItem(entry.title, entry.description), ['zhihu'], parse_entry_time(entry), {'id': entry.link}) log_info(u'zhihu: new entry: {} {}'.format(entry.link, entry.title))
def stackoverflow_rss_fetcher(ctx): """fetcher stackoverflow.com/feeds, save each title with tag `stackoverflow`""" URL = 'http://stackoverflow.com/feeds' coll = ctx.get_mongo_collection() for entry in fetch_rss(URL).entries: try: coll.insert({'_id': entry.id}) except DuplicateKeyError: continue ctx.new_item(TextOnlyItem(entry.title, entry.summary), ['Stackoverflow'], parse_entry_time(entry), {'id': entry.id}) log_info(u'stackoverflow: new entry: {} {}'.format(entry.id, entry.title))
def login(): """login api""" try: username = request.values['username'] password = request.values['password'] assert isinstance(username, basestring) \ and isinstance(password, basestring) except Exception as e: return {'error': 'illegal login format!' + str(e)} auth = User(username, password) err = auth.get_error() if err: return err login_user(auth) log_info('user {0} succesfully logged in'.format(username)) return {'success': 1}
def quora_rss_fetcher(ctx): """fetcher quora.com/rss/, save each title with tag `quora`""" URL = 'https://www.quora.com/rss' coll = ctx.get_mongo_collection() for entry in fetch_rss(URL).entries: try: coll.insert({'_id': entry.id}) except DuplicateKeyError: continue try: content = entry.content[0].value except: content = entry.summary ctx.new_item(TextOnlyItem(entry.title, content), ['Quora'], parse_entry_time(entry), {'id': entry.id}) log_info(u'quora rss: new entry: {} {}'.format(entry.id, entry.title))
def tsinghua_portal_fetcher(ctx): """fetcher portal.tsinghua.edu.cn, save each title with tag `portal`""" coll = ctx.get_mongo_collection() for entry in Tsinghua.getInfoNotices(): try: coll.insert({'_id': entry['link']}) except DuplicateKeyError: continue ctx.new_item(TextOnlyItem(entry['title'], ""), ['Tsinghua info'], other={"id": entry['link']}) log_info(u'Tsinghua Info: new entry: {} {}'.format(entry['link'], entry['title'])) for entry in Tsinghua.getLibNotices(): try: coll.insert({'_id': entry['link']}) except DuplicateKeyError: continue ctx.new_item(TextOnlyItem(entry['title'], ""), ['Tsinghua library'], other={"id": entry['link']}) log_info(u'Tsinghua Library: new entry: {} {}'.format(entry['link'], entry['title']))
def run(cls, ctx): conf = cls.load_config(ctx.user_id) if not conf: ctx.new_item(TextOnlyItem(u'网络学堂验证失败', ''), ['THU learn']) return coll = ctx.get_mongo_collection() if is_in_unittest(): entries = [{'id': 'test-{}'.format(uuid.uuid4()), 'title': 'thu learn in testcase', 'content': '{}@{}'.format(conf['username'], conf['password']), 'create_time': '2013-12-14'}] else: try: entries = fetch(conf['username'], conf['password']) print entries except Exception as e: print e ctx.new_item(TextOnlyItem(u'网络学堂抓取失败:' + str(e), ''), ['THU learn']) log_exc(e) return for entry in entries: try: coll.insert({'_id': str(ctx.user_id) + entry['id']}) except DuplicateKeyError: continue ctx.new_item( TextOnlyItem(entry['title'], entry['content']), ['THU learn'], time.strptime(entry['create_time'], '%Y-%m-%d'), {'id': entry['id']}) log_info(u'THU learn: new entry: {} {}'.format(entry['id'], entry['title']))
def auto_tagging(ctx, doc): """auto tagging an item. It will load tagger model from `ukconfig.tagger_path`. Model should be trained prior to make this function work""" global _tagger if _tagger is None: try: log_info('loading tagger ...') _tagger = TextTagger.load(ukconfig.tagger_path) except IOError: log_info('tagger model not found.') return tags = _tagger.predict_one(doc['desc'].render_content()) declare_tag(tags) log_info('original tag: ' + str(doc['tag'])) log_info('autotagging: ' + str(tags)) doc['tag'] = list(set(doc['tag'] + tags)) """auto tag """
def logout(): """ logout api""" log_info('user {0} logged out'.format(current_user.username)) logout_user() return {'success': 1}
def bitcoin_fetcher(ctx): """fetcher btcchina.com, save with tag `bitcoin`""" price = getPrice() ctx.new_item(TextOnlyItem("Last Bitcoin Price: " + u"¥" + str(price), ""), ['Bitcoin']) log_info(u'bitcoin price update: {}'.format(price))