def _cond_update_url_info(cond, update_map, inc_map = None, fields = ["_id"]): ''' Notes: fields are just fields from urlRepository, while update_map/inc_map can include metaUrlRepository fields. updates for metaUrlRepository fields just support async mode. ''' fields = make_fields(fields) first_update, second_update = _make_update(update_map, inc_map) if second_update is not None: crawlerMetadb.update_url_info_meta(cond, second_update) if first_update is not None: return db.urlRepository.find_and_modify(cond, first_update, fields=fields) else: return _cond_get_url_info(cond, fields)
def get_url_info_meta(url, fields): cond = default_cond(url) fields = make_fields(fields) return _cond_get_url_info_meta(cond, fields)
def get_url_infos(cond, fields): fields = make_fields(fields) return db.urlRepository.find(cond, fields=fields)
def get_raw_docs_by_statuses(statuses, fields): fields = make_fields(fields) return db.rawDocs.find({"process_status" : {"$in" : statuses}}, fields=fields)
def get_url_infos_by_statuses(statuses, fields): fields = make_fields(fields) return db.urlRepository.find({"crawl_status" : {"$in" : statuses}}, fields=fields)
def _cond_get_url_info(cond, fields): fields = make_fields(fields) return db.urlRepository.find_one(cond, fields = fields)