Ejemplo n.º 1
0
def get_relative_path(filepath, root=None):
    filepath = same_slash(filepath)
    root = same_slash(root)
    relative_path = os.path.split(filepath)[-1]
    if root and filepath.startswith(root + '/'):
        relative_path = filepath.replace(root + '/', '', 1)
    return relative_path
Ejemplo n.º 2
0
def sync_loop_local_filesystem(root_path,
                               app_name,
                               check_md5=True,
                               extra_should_sync_func=None):
    root_path = same_slash(root_path)
    if not os.path.isdir(root_path):  # 根目录不存在,不处理
        return []
    file_paths = []
    for parent, folders, files in os.walk(root_path):
        if is_a_hidden_path(parent):
            continue
        elif not is_real(parent):  # link类型的不处理
            continue
        for fs in [files, folders]:
            for filename in fs:
                filepath = join(parent, filename)
                # 看是否已经在本地数据库了
                if not should_sync(
                        filepath,
                        root_path,
                        app_name,
                        check_md5,
                        extra_should_sync_func=extra_should_sync_func):
                    continue
                file_paths.append(filepath)
    return file_paths
Ejemplo n.º 3
0
def make_sure_archive_folder(filepath):
    # 根据某一个文件路径,创建其对应的.Archive目录,主要是用来处理版本管理的
    folder_path = same_slash(os.path.dirname(filepath))
    archive_path = join(folder_path, '.Archive')
    if not os.path.isdir(archive_path):
        os.makedirs(archive_path)
    return archive_path
Ejemplo n.º 4
0
def update_sync_data(filepath, root, new_data, app_name):
    # 有时候有必要,需要存储一些特殊的字段进去
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root, app_name)
    data = get_sync_data(filepath, root, app_name)
    data.update(new_data)
    with open(data_path, 'w') as f:
        f.write(json.dumps(data))
Ejemplo n.º 5
0
def after_sync_deleted(filepath, root, app_name):
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root, app_name)
    if os.path.isfile(data_path):
        try:
            os.remove(data_path)
        except:
            pass
Ejemplo n.º 6
0
def get_file_versions_folder(filepath):
    filepath = same_slash(filepath)
    if not os.path.isfile(filepath):  # 源文件不存在,则不处理
        return  # ignore
    archive_path = make_sure_archive_folder(filepath)  # 确保.Archive目录存在
    filename = os.path.split(filepath)[-1]
    versions_folder = join(archive_path, filename)
    return versions_folder
Ejemplo n.º 7
0
def sync_find_files_to_delete(root_path, app_name, as_dict=False):
    sync_data_folder = get_sync_data_folder(root_path, app_name)
    if not os.path.isdir(sync_data_folder):  # never synced before
        return []
    files = sync_loop_local_filesystem(root_path,
                                       app_name=app_name,
                                       check_md5=False)  # same_path already
    data_filenames = os.listdir(sync_data_folder)
    old_file_paths = []
    old_dir_paths = set()
    for data_filename in data_filenames:
        data_filepath = join(sync_data_folder, data_filename)
        try:
            with open(data_filepath) as f:
                data = json.loads(f.read())
                filepath = data.get('filepath')
                is_dir = data.get('is_dir', False)
                if data.get('is_relative'):
                    filepath = join(root_path, filepath)
                if filepath:
                    filepath = same_slash(filepath)
                    old_file_paths.append(filepath)
                    if is_dir:
                        old_dir_paths.add(filepath)

        except:
            pass
    _filepaths_to_delete = list(set(old_file_paths) - set(files))

    # 让 folder 类型的排在最后
    filepaths_to_delete = []
    dirs_to_delete = []
    for path in _filepaths_to_delete:
        # todo 尝试判断是不是在 iCloud 上
        is_dir = filepath in old_dir_paths
        if not is_dir:
            filepaths_to_delete.append(path)
        else:
            dirs_to_delete.append(path)
    filepaths_to_delete += dirs_to_delete

    if as_dict:
        filepaths_to_delete_as_dict = []
        for filepath in filepaths_to_delete:
            is_dir = filepath in old_dir_paths
            filepaths_to_delete_as_dict.append(
                dict(path=filepath, filepath=filepath, is_dir=is_dir))
        return filepaths_to_delete_as_dict

    else:
        return filepaths_to_delete
Ejemplo n.º 8
0
def get_sync_data(filepath, root, app_name):
    # get the synced information for a filepath
    # 根据文件的路径,获得对应 md5 文件,里面存储了必要的信息(md5 * synced_at),用于判断当前文件是否需要同步
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root, app_name)
    if os.path.isfile(data_path):
        try:
            with open(data_path) as f:
                data = json.loads(f.read())
                if data.get('is_relative'):
                    # 相对路径,转为绝对路径
                    data['filepath'] = join(root, data['filepath'])
            if isinstance(data, dict):
                return data
        except:
            pass
    return {}  # final
Ejemplo n.º 9
0
    def __init__(
        self,
        relative_path,
        abs_filepath=None,
        private_key=None,
        should_encrypt_file=False,
        is_deleted=False,
        is_dir=None,
        ipfs_key=None,
        doc_type=None,
        version=None,
        raw_content=None,
        files_info=None,
        real_relative_path=None,
        utc_offset=None,
    ):
        if raw_content:  # 直接传入内容, abs_filepath 直接无效
            abs_filepath = None

        # 外部存储所有 files 的一个数据对象
        self.files_info_is_updated = False
        self.files_info = files_info

        self.real_relative_path = real_relative_path
        self.relative_path = same_slash(relative_path).lstrip('/')
        self.path = self.relative_path
        self.abs_filepath = abs_filepath
        self.is_deleted = is_deleted
        self._is_dir = is_dir
        self._ipfs_key = ipfs_key
        self._doc_type = doc_type
        self.private_key = private_key  # 除了往服务器中提交数据之外,也是加密文件用到的的 key
        self.should_encrypt_file = should_encrypt_file
        self.should_ignore_current_file = False

        self._raw_content = raw_content
        self._raw_byte_content = smart_str(raw_content or '')

        self.version = version

        self.utc_offset = utc_offset
Ejemplo n.º 10
0
def after_synced(filepath, root, app_name, **extra_data):
    filepath = same_slash(filepath)
    if not os.path.exists(filepath):
        return  # ignore
    if not is_sub_path(filepath, root):
        return  # ignore
    data_path = get_sync_data_filepath(filepath, root, app_name)
    now = datetime.datetime.now()
    relative_path = get_relative_path(filepath, root)
    sync_data = dict(filepath=relative_path,
                     synced_at=now.strftime('%Y-%m-%d %H:%M:%S'),
                     md5=md5_for_file(filepath),
                     is_dir=os.path.isdir(filepath),
                     is_relative=True)
    sync_data.update(extra_data)
    with open(data_path, 'w') as f:
        f.write(json.dumps(sync_data))

    # store the parent folders into the local sync db
    parent_folder = os.path.dirname(filepath)
    parent_data_path = get_sync_data_filepath(parent_folder, root, app_name)
    if not os.path.isfile(parent_data_path):
        after_synced(parent_folder, root, app_name)
Ejemplo n.º 11
0
def create_file_version(filepath,
                        force=False,
                        min_time_diff=60,
                        history_max_versions=150):
    # force 表示强制version
    # 将一个file_path进行拷贝到特定目录的读写操作,进而形成version的概念
    filepath = same_slash(filepath)

    # for Markdown file only
    if not _is_a_markdown_file(filepath):
        return

    if not os.path.exists(filepath):
        return  # ignore

    if os.path.isdir(filepath):
        return

    with open(filepath, 'rb') as f:
        raw_content = f.read()
        if not raw_content:
            return  # blank content, ignore

    raw_content = smart_str(raw_content)

    now = datetime.datetime.now()
    now_str = now.strftime('%Y-%m-%d %H-%M-%S')

    version_folder = get_file_versions_folder(filepath)
    if not version_folder:
        return  # ignore

    version_file_path = join(version_folder,
                             now_str + os.path.splitext(filepath)[1])

    if not os.path.isdir(version_folder):
        os.makedirs(version_folder)

    versions_file_names = os.listdir(version_folder)
    versions_file_names = [
        name for name in versions_file_names
        if re.search('\d{4}-\d{1,2}-\d{1,2}', name)
    ]
    versions_file_names.sort()
    versions_file_names.reverse()  # 倒序之后,就是最新的排最前面

    file_size = os.path.getsize(filepath)
    now = time.time()

    if versions_file_names and file_size < 30 * 1024:  # 对30k以下的文章,做version进一步的判断
        last_version = versions_file_names[0]
        last_path = join(version_folder, last_version)
        last_mtime = os.path.getmtime(last_path)
        with open(last_path) as f:
            last_content = f.read()
        if last_content == raw_content:  # 内容没有变化,ignore
            return

        length_diff = abs(len(last_content) - len(raw_content))
        if length_diff < 30 or 0 < (now -
                                    last_mtime) < min_time_diff and not force:
            # 内容长度变化小余30,或者最后修改时间1分钟内的,忽略掉
            return  # ignore
    elif versions_file_names:
        # 1 分钟内, 才会尝试创建一个 version
        last_version = versions_file_names[0]
        last_path = join(version_folder, last_version)
        last_mtime = os.path.getmtime(last_path)
        if 0 < (now - last_mtime) < min_time_diff and not force:
            return  # ignore

    if file_size < 10 * 1024:  # 10k以下
        max_versions = history_max_versions
        if max_versions < 0: max_versions = 0
    elif file_size < 100 * 1024:  # 100k
        max_versions = 80
    elif file_size < 500 * 1024:
        max_versions = 50
    else:
        max_versions = 20

    if not max_versions:  # ignore, versions is not allowed
        return

    for version_name_to_delete in versions_file_names[
            max_versions:]:  # 删除过多的版本文件
        file_path_to_delete = join(version_folder, version_name_to_delete)
        try:
            os.remove(file_path_to_delete)
        except IOError:
            pass

    try:
        with open(version_file_path, 'wb') as new_f:
            new_f.write(raw_content)
    except IOError:  # 失败
        return