def after_path_related_record_deleted(bucket, record_data): path = get_path_from_record(record_data) if not path: return bucket_name_for_id = get_bucket_name_for_path(bucket) bucket_name_for_url = get_bucket_name_for_url(bucket) bucket_name_for_slash = get_bucket_name_for_slash(bucket) bucket_name_for_order = get_bucket_name_for_order_by_record( bucket, record_data) #bucket_name_for_order_file_type = "%s_file_order" % bucket # path 上清掉 hdel(bucket_name_for_id, path) # 数据类型,对应的排序 if bucket_name_for_order: zdel(bucket_name_for_order, path) #if bucket_name_for_order != bucket_name_for_order_file_type: # zdel(bucket_name_for_order_file_type, path) # slash 上清掉 zdel(bucket_name_for_slash, path) # url 上两个进行删除 url_path = get_url_path(record_data) or hget(bucket_name_for_url, path) if url_path: hdel(bucket_name_for_url, url_path) hdel(bucket_name_for_url, path) # 删除文件, 使用 storage 来处理这里的逻辑 storage.when_record_deleted(bucket, record_data)
def get_post_with_greed(url_body, parent_doc=None): pure_url_body = re.split("[?#]", url_body)[0] post_url = pure_url_body d = get_data_namespace() post_doc = d.get_doc(post_url) current_data_root = get_current_data_root() parent_doc = parent_doc or get_doc_in_request() if not post_doc and is_a_markdown_file(post_url) and parent_doc and isinstance(parent_doc, dict): filename = post_url if "/post/" in filename: filename = filename.split("/post/", 1)[-1] parent_post_doc_path = get_path_from_record(parent_doc) if parent_post_doc_path: post_doc_parent = os.path.split(parent_post_doc_path)[0] if post_doc_parent: abs_path = "%s/%s" % (post_doc_parent.strip("/"), filename.strip("/")) post_doc = d.get_doc_by_path(abs_path) if current_data_root and not post_doc: # 增加 wiki_root 作为前缀,再尝试匹配 abs_path = "%s/%s" % (current_data_root, filename.strip("/")) post_doc = d.get_doc_by_path(abs_path) if not post_doc: # 尝试 hit keyword 的方式进行搜索匹配 bucket = get_bucket_in_request_context() post_name = (get_get_var(url_body, "name") or "").strip() if post_name: if "." in post_name: post_name = os.path.splitext(post_name)[0] post_doc = get_one_post_by_es(bucket, keywords=post_name, under=current_data_root) if not post_doc and is_a_markdown_file(post_url): # 直接搜索 filename just_post_file_name = get_just_name(post_url) if just_post_file_name != post_name: post_doc = get_one_post_by_es(bucket, keywords=just_post_file_name, under=current_data_root) return post_doc
def get_doc_url_for_template_api(doc, url_prefix, url_root=None, hit_url_path=False): # hit_url_path=True 的时候,post 上有 url_path, 但跟 post.url 直接调用的逻辑不亦一样 # post.url 相当于有一个动态的 url_prefix if not doc or not isinstance(doc, dict): return "" if not isinstance(url_prefix, string_types): return "" if url_root and not isinstance(url_root, string_types): return "" url_prefix = url_prefix.strip("/") doc_path = get_path_from_record(doc) if not doc_path: return "" url_path = "" if hit_url_path: url_path = smart_unicode(doc.get("url_path") or "").strip("/") if url_path: return "/%s/%s" % (url_prefix, url_path) if not url_root or not isinstance(url_root, string_types): return "/%s/%s" % (url_prefix, doc_path) else: relative_path = get_relative_path(doc_path.lower(), url_root.lower(), return_name_if_fail=False) if not relative_path: return "/%s/%s" % (url_prefix, doc_path) else: return "/%s/%s" % (url_prefix, relative_path)
def get_wiki_url_for_doc(wiki_root, doc): if not isinstance(wiki_root, string_types) or not isinstance(doc, dict): return "" wiki_root = wiki_root.strip("/") doc_type = get_type_from_record(doc) doc_path = get_path_from_record(doc) relative_path = get_relative_path(doc_path.lower().strip("/"), wiki_root, return_name_if_fail=False) if not relative_path: return "" if doc_type == "post": return "/wiki/post/%s" % relative_path else: return "/wiki/category/%s" % relative_path
def update_path_related_when_record_changed(bucket, record_id, record_data): # 注意:!!!! 凡是 record 中有指定 path 段的,都是可以被更新的;如果不希望这个 record 被 删除,就不要赋予这个字段 if not isinstance(record_data, dict): return if not bucket or not record_id: return is_deleted = record_data.get('is_deleted', False) # path 相关的逻辑 path = get_path_from_record(record_data) if not path: return if is_deleted: # delete the record, 实际上都是增量,只是一个 action=delete 的标识而已; 但也有直接把 record 删除掉的 after_path_related_record_deleted(bucket, record_data=record_data) else: # create or update after_path_related_record_created(bucket, record_id, record_data)
def update_record_order_value_to_related_db(bucket, record_data, force_value=None): # 设定排序, 如果没有排序逻辑的,实际上根据 get_data(type) 的逻辑是无法取出内容的 path = get_path_from_record(record_data) if not path: return path = path.strip('/') if not path: return bucket_name_for_order = get_bucket_name_for_order_by_record(bucket, record_data) data_type = get_data_type(record_data) data_order = record_data.get("_order") or record_data.get("order") if not data_order and data_type in ["file", "post", "folder"]: # 不应该出现的情况 data_order = time.time() if data_order is not None: data_order = to_float(data_order, default_if_fail=None) if force_value is not None: data_order = force_value if data_order is not None and bucket_name_for_order: zset(bucket_name_for_order, path, data_order)
def render_as_static_file_for_farbox_bucket(path): if not path or path == "/": path = "index.html" bucket = get_bucket_in_request_context() if not bucket: return record = get_record_by_path(bucket, path) if path == "favicon.ico" and not record: record = get_record_by_path(bucket, "_direct/favicon.ico") # 兼容 Bitcron if not record: return record_path = get_path_from_record(record) if record_path and record_path.startswith("/_data/"): ext = os.path.splitext(record_path)[-1].strip(".").lower() if ext in ["csv"] and not is_bucket_login(bucket): return abort( 404, "csv under /_data is not allowed to download directly") set_context_value_from_request("is_static_file", True) if record.get('compiled_type') and record.get('compiled_content'): raw_content = record.get('compiled_content') content_type = record.get('compiled_type') raw_content = to_bytes(raw_content) mimetype = content_type or guess_type( path) or 'application/octet-stream' compiled_file_response = send_file(io.BytesIO(to_bytes(raw_content)), mimetype=mimetype) return compiled_file_response else: # 先对应是否防盗链的逻辑 site_configs = get_bucket_site_configs(bucket) anti_theft_chain = site_configs.get("anti_theft_chain", True) if anti_theft_chain and request.path.strip('/') in ['favicon.ico']: anti_theft_chain = False if anti_theft_chain and request.referrer: refer_host = get_host_from_url(request.referrer) if refer_host != request.host and "." in request.path: return abort(404, "this url is not allowed for outside") return storage.get_download_response_for_record(bucket=bucket, record_data=record, try_resized_image=True)
def compute_content_with_referred_docs(post_doc, html_content=None, show_date=True, url_prefix=None, url_root=None, hit_url_path=False): path = get_path_from_record(post_doc) if not path: return "" content = html_content or post_doc.get("content") or "" new_content = re.sub( r"""(<[^<>]+>)?<a [^<>]*?href=['"](.*?)['"][^<>]*?>.*?</a>(</\w+>|<\w+ */>)?""", curry( re_sub_for_referred_docs, show_date=show_date, post_doc=post_doc, url_prefix=url_prefix, url_root=url_root, hit_url_path=hit_url_path, ), content) return new_content
def auto_clean_record_before_handle_path_related_record(bucket, record_data): # 有 path 的情况,并且 client 过来的数据要求 _auto_clean_bucket 的,则进行如此处理 # 删除的性质,便不会增加一个冗余的 record,而直接删除掉旧的 record 以及相关数据了; 同时 break,不进行后续的操作了 # 如果是 update 的性质,则会删除前一个同 path 的 record if not isinstance(record_data, dict): return # ignore is_deleted = record_data.get('is_deleted') path = get_path_from_record(record_data) auto_clean_bucket = record_data.get('_auto_clean_bucket', False) if not auto_clean_bucket: return if not path: return if is_deleted: delete_path_related_record_by_path(bucket, path, record_data) return 'break' # 后面不运行了 else: # 已经存在的,先删除, 后面有的,后面再自行 update delete_path_related_record_by_path(bucket, path, continue_to_create_record=True)
def get_referred_back_docs(self, doc=None): # 引用了本文的 docs doc = doc or self.get_current_post(auto_raise_404=False) if not doc: return [] else: return get_records_by_post_path_back_referred(self.bucket, get_path_from_record(doc))
def after_path_related_record_created(bucket, record_id, record_data): path = get_path_from_record(record_data) if not path: return path = path.strip('/') if not path: return #original_path = to_unicode(record_data.get('path').strip('/')) # 如果 parent 不存在,也需要创建一个 parent_path = os.path.split(path)[0].strip("/") if parent_path and not has_record_by_path(bucket, parent_path): # 如果 parent 如果不存在,也需要创建一个, 并且递归往上走,所有的 parents 都创建 real_path = get_path_from_record(record_data, is_lower=False) real_parent_path = os.path.split(real_path)[0].strip("/") # 保留了大小写 create_record_for_a_folder(bucket, real_parent_path) slash_number = path.count('/') bucket_name_for_path = get_bucket_name_for_path(bucket) bucket_name_for_url = get_bucket_name_for_url(bucket) bucket_name_for_slash = get_bucket_name_for_slash(bucket) #bucket_name_for_order = get_bucket_name_for_order_by_record(bucket, record_data) to_mark_object_id = False data_type = get_data_type(record_data) if data_type == 'post' and record_data.get('status', 'public')!='public': # post 类型的,标记 object_id,前面加 # to_mark_object_id = True # url 匹配用的索引 url_path = get_url_path(record_data) # 两两对应的映射关系, 除了通过 url 找path,还可以 通过 path 找 url,因为删除一个 record 的时候,只有 path,没有 url_path if url_path: hset(bucket_name_for_url, url_path, path) if path != url_path: hset(bucket_name_for_url, path, url_path) # 建立 path 与 object_id 的关系 if to_mark_object_id: # 在普通 list 的时候,不会出现;单独路径查找,又能找到 value = '#%s'%record_id else: value = record_id # 同时记录 version & size: record_id,size,version size = record_data.get('size') or 0 version = record_data.get('version') or '' if record_data.get('is_dir'): version = 'folder' value = '%s,%s,%s' % (value, size, version) hset(bucket_name_for_path, path, value) # 设定排序, 如果没有排序逻辑的,实际上根据 get_data(type) 的逻辑是无法取出内容的 # 如果是 post,并且是非 public 的, 强制 order=0,这样在外部提取 paths 进行分页的时候,默认在 -date 排序的时候,就会排在最后 update_record_order_value_to_related_db(bucket, record_data, force_value=0 if to_mark_object_id else None) # slash number 绑定到 path # 指定的类型才能处理 slash if data_type in BUCKET_RECORD_SLASH_TYPES: zset(bucket_name_for_slash, path, score=slash_number) update_tags_info_for_posts(bucket=bucket, record_data=record_data) # files and posts info
def show_albums_as_sub_site(): bucket = get_bucket_in_request_context() if not bucket: return request_path = get_request_path().strip("/") if not re.match("album(/|$)", request_path): return if "." in request_path and guess_type( request_path, default_type="").startswith("image/"): # 可能是直接的图片地址,避免被整个 album 给拦截了 return site_configs = get_bucket_site_configs(bucket) albums_root = smart_unicode(site_configs.get("albums_root", "")) if not albums_root: return albums_root = albums_root.strip("/") #todo 允许直接设定 / ? albums_home_sort = site_configs.get("albums_home_sort", "-date") album_items_sort = site_configs.get("album_items_sort", "-date") page_title = site_configs.get("albums_title") or get_just_name( albums_root, for_folder=True) if re.match("album/?$", request_path): # folders doc_type = "folder" doc_sort = albums_home_sort under = albums_root else: doc_type = "image" doc_sort = album_items_sort under = "%s/%s" % (albums_root, request_path.split("/", 1)[-1].strip("/")) folder_doc = get_record_by_path(bucket=bucket, path=under) if folder_doc: page_title = folder_doc.get("title") or get_just_name( folder_doc.get("path"), for_folder=True) else: page_title = get_just_name(under, for_folder=True) if doc_sort not in ["date", "-date"]: doc_sort = "-date" limit = 15 # todo 可以设定? doc_level = 1 # min_images_count = 1 docs = Data.get_data(path=under, type=doc_type, limit=limit, level=doc_level, sort=doc_sort, pager_name='album_docs_pager', exclude='default') if doc_type == "folder": for doc in docs: doc_path = get_path_from_record(doc, is_lower=True) relative_path = doc_path.replace(albums_root.lower(), "", 1).strip("/") doc["album_url"] = "/album/%s" % relative_path return render_api_template( "builtin_theme_album_waterfall.jade", docs=docs, page_title=page_title, )
def update_post_tags_words_info(bucket, record_data): path = get_path_from_record(record_data) lower_path = path.lower().lstrip('/') if not path: return if not is_a_markdown_file(path): return if lower_path.startswith('_nav/'): return posts_info = get_bucket_posts_info(bucket) or {} # data init bucket_text_words = to_int(posts_info.get('text_words') or 0, default_if_fail=0) # prepare tags info tags_info = posts_info.setdefault( 'tags', {}) # {'paths':{path:[tag1,tag2]} , 'tags': {'tag':[path1, path2]} } tags_info_tags = tags_info.setdefault('tags', {}) tags_info_paths = tags_info.setdefault('paths', {}) # prepare links info links_info = posts_info.setdefault( "links", {}) # {'paths': {path:[back_path1, back_path2]]} , # 'back_paths': {'back_path':[path1, path2]} } links_info_links = links_info.setdefault("links", {}) links_info_paths = links_info.setdefault("paths", {}) words_info = posts_info.setdefault('words', {}) # {'path': text_words} is_deleted = record_data.get('is_deleted', False) post_status = record_data.get('status') or 'public' post_tags = record_data.get('tags') or [] if not isinstance(post_tags, (list, tuple)): post_tags = [] post_doc_links, wiki_tags = get_linked_docs_from_markdown_content( path, record_data.get("raw_content"), md_link_abs_check_func=partial(has_record_by_path, bucket)) if not isinstance(post_doc_links, (list, tuple)): post_doc_links = [] text_words = to_int(record_data.get('text_words'), default_if_fail=0) # 如果已有 words 的信息,先减去,避免重复计算 old_text_words = to_int(words_info.get(lower_path), default_if_fail=0) if old_text_words: bucket_text_words -= old_text_words # deleted, 草稿类似的状态,不进入计算;去除相关的信息 if is_deleted or post_status in ['draft', 'private']: words_info.pop(lower_path, None) # handle delete tags old_tags = tags_info_paths.get(lower_path) if not isinstance(old_tags, (list, tuple)): old_tags = [] old_tags = [smart_unicode(tag) for tag in old_tags] tags_info_paths.pop(lower_path, None) for tag in old_tags: tags_info_tags_for_tag_paths = tags_info_tags.setdefault(tag, []) if lower_path in tags_info_tags_for_tag_paths: tags_info_tags_for_tag_paths.remove(lower_path) if not tags_info_tags_for_tag_paths: # 空的 tags tags_info_tags.pop(tag, None) # handle delete links old_links = links_info_paths.get(lower_path) if not isinstance(old_links, (list, tuple)): old_links = [] old_links = [smart_unicode(link) for link in old_links] links_info_paths.pop(lower_path, None) for link in old_links: links_info_link_back_paths = links_info_links.setdefault(link, []) if lower_path in links_info_link_back_paths: links_info_link_back_paths.remove(lower_path) if not links_info_link_back_paths: # 空的 links 了 links_info_links.pop(link, None) else: bucket_text_words += text_words words_info[lower_path] = text_words # handle tags if post_tags: tags_info_paths[lower_path] = post_tags for tag in post_tags: tags_info_tags_for_tag_paths = tags_info_tags.setdefault(tag, []) if lower_path not in tags_info_tags_for_tag_paths: tags_info_tags_for_tag_paths.append(lower_path) empty_tags = [] for tag, paths_tagged in tags_info_tags.items(): if not paths_tagged: empty_tags.append(tag) continue if not isinstance(paths_tagged, list): continue if lower_path in paths_tagged and tag not in post_tags: paths_tagged.remove(lower_path) if not paths_tagged: empty_tags.append(tag) for empty_tag in empty_tags: tags_info_tags.pop(empty_tag, None) # handle links if post_doc_links: links_info_paths[lower_path] = post_doc_links for link in post_doc_links: links_info_link_back_paths = links_info_links.setdefault(link, []) if lower_path not in links_info_link_back_paths: links_info_link_back_paths.append(lower_path) empty_links = [] for link, paths_linked in links_info_links.items(): if not paths_linked: empty_links.append(link) continue if not isinstance(paths_linked, list): continue if lower_path in paths_linked and link not in post_doc_links: paths_linked.remove(lower_path) if not paths_linked: empty_links.append(link) for empty_link in empty_links: links_info_links.pop(empty_link, None) if bucket_text_words < 0: bucket_text_words = 0 posts_info['text_words'] = bucket_text_words set_bucket_configs(bucket, configs=posts_info, config_type='posts')