def get_relative_path(filepath, root=None): filepath = same_slash(filepath) root = same_slash(root) relative_path = os.path.split(filepath)[-1] if root and filepath.startswith(root + '/'): relative_path = filepath.replace(root + '/', '', 1) return relative_path
def sync_loop_local_filesystem(root_path, app_name, check_md5=True, extra_should_sync_func=None): root_path = same_slash(root_path) if not os.path.isdir(root_path): # 根目录不存在,不处理 return [] file_paths = [] for parent, folders, files in os.walk(root_path): if is_a_hidden_path(parent): continue elif not is_real(parent): # link类型的不处理 continue for fs in [files, folders]: for filename in fs: filepath = join(parent, filename) # 看是否已经在本地数据库了 if not should_sync( filepath, root_path, app_name, check_md5, extra_should_sync_func=extra_should_sync_func): continue file_paths.append(filepath) return file_paths
def make_sure_archive_folder(filepath): # 根据某一个文件路径,创建其对应的.Archive目录,主要是用来处理版本管理的 folder_path = same_slash(os.path.dirname(filepath)) archive_path = join(folder_path, '.Archive') if not os.path.isdir(archive_path): os.makedirs(archive_path) return archive_path
def update_sync_data(filepath, root, new_data, app_name): # 有时候有必要,需要存储一些特殊的字段进去 filepath = same_slash(filepath) data_path = get_sync_data_filepath(filepath, root, app_name) data = get_sync_data(filepath, root, app_name) data.update(new_data) with open(data_path, 'w') as f: f.write(json.dumps(data))
def after_sync_deleted(filepath, root, app_name): filepath = same_slash(filepath) data_path = get_sync_data_filepath(filepath, root, app_name) if os.path.isfile(data_path): try: os.remove(data_path) except: pass
def get_file_versions_folder(filepath): filepath = same_slash(filepath) if not os.path.isfile(filepath): # 源文件不存在,则不处理 return # ignore archive_path = make_sure_archive_folder(filepath) # 确保.Archive目录存在 filename = os.path.split(filepath)[-1] versions_folder = join(archive_path, filename) return versions_folder
def sync_find_files_to_delete(root_path, app_name, as_dict=False): sync_data_folder = get_sync_data_folder(root_path, app_name) if not os.path.isdir(sync_data_folder): # never synced before return [] files = sync_loop_local_filesystem(root_path, app_name=app_name, check_md5=False) # same_path already data_filenames = os.listdir(sync_data_folder) old_file_paths = [] old_dir_paths = set() for data_filename in data_filenames: data_filepath = join(sync_data_folder, data_filename) try: with open(data_filepath) as f: data = json.loads(f.read()) filepath = data.get('filepath') is_dir = data.get('is_dir', False) if data.get('is_relative'): filepath = join(root_path, filepath) if filepath: filepath = same_slash(filepath) old_file_paths.append(filepath) if is_dir: old_dir_paths.add(filepath) except: pass _filepaths_to_delete = list(set(old_file_paths) - set(files)) # 让 folder 类型的排在最后 filepaths_to_delete = [] dirs_to_delete = [] for path in _filepaths_to_delete: # todo 尝试判断是不是在 iCloud 上 is_dir = filepath in old_dir_paths if not is_dir: filepaths_to_delete.append(path) else: dirs_to_delete.append(path) filepaths_to_delete += dirs_to_delete if as_dict: filepaths_to_delete_as_dict = [] for filepath in filepaths_to_delete: is_dir = filepath in old_dir_paths filepaths_to_delete_as_dict.append( dict(path=filepath, filepath=filepath, is_dir=is_dir)) return filepaths_to_delete_as_dict else: return filepaths_to_delete
def get_sync_data(filepath, root, app_name): # get the synced information for a filepath # 根据文件的路径,获得对应 md5 文件,里面存储了必要的信息(md5 * synced_at),用于判断当前文件是否需要同步 filepath = same_slash(filepath) data_path = get_sync_data_filepath(filepath, root, app_name) if os.path.isfile(data_path): try: with open(data_path) as f: data = json.loads(f.read()) if data.get('is_relative'): # 相对路径,转为绝对路径 data['filepath'] = join(root, data['filepath']) if isinstance(data, dict): return data except: pass return {} # final
def __init__( self, relative_path, abs_filepath=None, private_key=None, should_encrypt_file=False, is_deleted=False, is_dir=None, ipfs_key=None, doc_type=None, version=None, raw_content=None, files_info=None, real_relative_path=None, utc_offset=None, ): if raw_content: # 直接传入内容, abs_filepath 直接无效 abs_filepath = None # 外部存储所有 files 的一个数据对象 self.files_info_is_updated = False self.files_info = files_info self.real_relative_path = real_relative_path self.relative_path = same_slash(relative_path).lstrip('/') self.path = self.relative_path self.abs_filepath = abs_filepath self.is_deleted = is_deleted self._is_dir = is_dir self._ipfs_key = ipfs_key self._doc_type = doc_type self.private_key = private_key # 除了往服务器中提交数据之外,也是加密文件用到的的 key self.should_encrypt_file = should_encrypt_file self.should_ignore_current_file = False self._raw_content = raw_content self._raw_byte_content = smart_str(raw_content or '') self.version = version self.utc_offset = utc_offset
def after_synced(filepath, root, app_name, **extra_data): filepath = same_slash(filepath) if not os.path.exists(filepath): return # ignore if not is_sub_path(filepath, root): return # ignore data_path = get_sync_data_filepath(filepath, root, app_name) now = datetime.datetime.now() relative_path = get_relative_path(filepath, root) sync_data = dict(filepath=relative_path, synced_at=now.strftime('%Y-%m-%d %H:%M:%S'), md5=md5_for_file(filepath), is_dir=os.path.isdir(filepath), is_relative=True) sync_data.update(extra_data) with open(data_path, 'w') as f: f.write(json.dumps(sync_data)) # store the parent folders into the local sync db parent_folder = os.path.dirname(filepath) parent_data_path = get_sync_data_filepath(parent_folder, root, app_name) if not os.path.isfile(parent_data_path): after_synced(parent_folder, root, app_name)
def create_file_version(filepath, force=False, min_time_diff=60, history_max_versions=150): # force 表示强制version # 将一个file_path进行拷贝到特定目录的读写操作,进而形成version的概念 filepath = same_slash(filepath) # for Markdown file only if not _is_a_markdown_file(filepath): return if not os.path.exists(filepath): return # ignore if os.path.isdir(filepath): return with open(filepath, 'rb') as f: raw_content = f.read() if not raw_content: return # blank content, ignore raw_content = smart_str(raw_content) now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d %H-%M-%S') version_folder = get_file_versions_folder(filepath) if not version_folder: return # ignore version_file_path = join(version_folder, now_str + os.path.splitext(filepath)[1]) if not os.path.isdir(version_folder): os.makedirs(version_folder) versions_file_names = os.listdir(version_folder) versions_file_names = [ name for name in versions_file_names if re.search('\d{4}-\d{1,2}-\d{1,2}', name) ] versions_file_names.sort() versions_file_names.reverse() # 倒序之后,就是最新的排最前面 file_size = os.path.getsize(filepath) now = time.time() if versions_file_names and file_size < 30 * 1024: # 对30k以下的文章,做version进一步的判断 last_version = versions_file_names[0] last_path = join(version_folder, last_version) last_mtime = os.path.getmtime(last_path) with open(last_path) as f: last_content = f.read() if last_content == raw_content: # 内容没有变化,ignore return length_diff = abs(len(last_content) - len(raw_content)) if length_diff < 30 or 0 < (now - last_mtime) < min_time_diff and not force: # 内容长度变化小余30,或者最后修改时间1分钟内的,忽略掉 return # ignore elif versions_file_names: # 1 分钟内, 才会尝试创建一个 version last_version = versions_file_names[0] last_path = join(version_folder, last_version) last_mtime = os.path.getmtime(last_path) if 0 < (now - last_mtime) < min_time_diff and not force: return # ignore if file_size < 10 * 1024: # 10k以下 max_versions = history_max_versions if max_versions < 0: max_versions = 0 elif file_size < 100 * 1024: # 100k max_versions = 80 elif file_size < 500 * 1024: max_versions = 50 else: max_versions = 20 if not max_versions: # ignore, versions is not allowed return for version_name_to_delete in versions_file_names[ max_versions:]: # 删除过多的版本文件 file_path_to_delete = join(version_folder, version_name_to_delete) try: os.remove(file_path_to_delete) except IOError: pass try: with open(version_file_path, 'wb') as new_f: new_f.write(raw_content) except IOError: # 失败 return