Ejemplo n.º 1
0
def get_relative_path(filepath, root=None):
    filepath = same_slash(filepath)
    root = same_slash(root)
    relative_path = os.path.split(filepath)[-1]
    if root and filepath.startswith(root + '/'):
        relative_path = filepath.replace(root + '/', '', 1)
    return relative_path
Ejemplo n.º 2
0
def update_sync_data(filepath, root, new_data):
    # 有时候有必要,需要存储一些特殊的字段进去
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root)
    data = get_sync_data(filepath, root)
    data.update(new_data)
    with open(data_path, 'w') as f:
        f.write(json.dumps(data))
Ejemplo n.º 3
0
def after_sync_deleted(filepath, root):
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root)
    if os.path.isfile(data_path):
        try:
            os.remove(data_path)
        except:
            pass
Ejemplo n.º 4
0
def get_sync_data(filepath, root):
    # get the synced information for a filepath
    # 根据文件的路径,获得对应 md5 文件,里面存储了必要的信息(md5 * synced_at),用于判断当前文件是否需要同步
    filepath = same_slash(filepath)
    data_path = get_sync_data_filepath(filepath, root)
    if os.path.isfile(data_path):
        try:
            with open(data_path) as f:
                data = json.loads(f.read())
                if data.get('is_relative'):
                    # 相对路径,转为绝对路径
                    data['filepath'] = join(root, data['filepath'])
            if isinstance(data, dict):
                return data
        except:
            pass
    return {}  # final
Ejemplo n.º 5
0
def loop_local_filesystem(root_path, check_md5=True):
    root_path = same_slash(root_path)
    if not os.path.isdir(root_path):  # 根目录不存在,不处理
        return []
    file_paths = []
    for parent, folders, files in os.walk(root_path):
        if is_a_hidden_path(parent):
            continue
        elif not is_real(parent):  # link类型的不处理
            continue
        for fs in [files, folders]:
            for filename in fs:
                filepath = join(parent, filename)
                # 看是否已经在本地数据库了
                if not should_sync(filepath, root_path, check_md5):
                    continue
                file_paths.append(filepath)
    return file_paths
Ejemplo n.º 6
0
def find_files_to_delete(root_path):
    sync_data_folder = get_sync_data_folder(root_path)
    if not os.path.isdir(sync_data_folder):  # never synced before
        return []
    files = loop_local_filesystem(root_path,
                                  check_md5=False)  # same_path already
    data_filenames = os.listdir(sync_data_folder)
    old_file_paths = []
    for data_filename in data_filenames:
        data_filepath = join(sync_data_folder, data_filename)
        try:
            with open(data_filepath) as f:
                data = json.loads(f.read())
                filepath = data.get('filepath')
                if data.get('is_relative'):
                    filepath = join(root_path, filepath)
                if filepath:
                    old_file_paths.append(same_slash(filepath))
        except:
            pass
    return list(set(old_file_paths) - set(files))
Ejemplo n.º 7
0
def after_synced(filepath, root, **extra_data):
    filepath = same_slash(filepath)
    if not os.path.exists(filepath):
        return  # ignore
    if not is_sub_path(filepath, root):
        return  # ignore
    data_path = get_sync_data_filepath(filepath, root)
    now = datetime.datetime.now()
    relative_path = get_relative_path(filepath, root)
    sync_data = dict(filepath=relative_path,
                     synced_at=now.strftime('%Y-%m-%d %H:%M:%S'),
                     md5=md5_for_file(filepath),
                     is_dir=os.path.isdir(filepath),
                     is_relative=True)
    sync_data.update(extra_data)
    with open(data_path, 'w') as f:
        f.write(json.dumps(sync_data))

    # store the parent folders into the local sync db
    parent_folder = os.path.dirname(filepath)
    parent_data_path = get_sync_data_filepath(parent_folder, root)
    if not os.path.isfile(parent_data_path):
        after_synced(parent_folder, root)
Ejemplo n.º 8
0
def sync_from_server(root, token):
    logs = []
    root = same_slash(root)

    sync_configs = get_sync_configs(root, token)
    if not sync_configs:
        write_logs(
            "can't get the correct sync configs from server, check your network connection and your TOKEN?"
        )
        return

    sync_list_url = sync_configs['sync_list_url']
    sync_content_url = sync_configs['sync_content_url']

    old_cursor = get_sync_cursor(root)
    write_logs('start sync from server', root=root)
    new_cursor, metas = get_metas(token,
                                  cursor=old_cursor,
                                  sync_list_url=sync_list_url)
    if not metas:
        write_logs('no need to sync from server, it is up-to-date.', root=root)
        return
    else:
        write_logs('will try to sync %s files from server.' % len(metas),
                   root=root)

    for meta in metas:
        handler_log = handle_meta(token, meta, root, sync_content_url)
        if handler_log:
            write_logs(handler_log, root=root)
    if new_cursor:  # 处理之后,再保存cursor
        store_sync_cursor(root, new_cursor)
    if new_cursor is None and not metas:
        logs.append('syncing maybe failed, check your sync config?')
    logs.append('sync ended')
    write_logs(logs, root=root)
Ejemplo n.º 9
0
def handle_meta(token, meta, root_path, sync_content_url):
    # prefix 更多是一个范围,一般是一个 site folder
    root_path = same_slash(root_path).rstrip('/')
    relative_path = same_slash(meta['path']).lstrip(
        '/')  # 这个是大小写敏感的, 相对于根目录下的相对路径
    site_name = relative_path.strip('/').split('/')[0]

    full_path = join(root_path, relative_path)  # 本地电脑的path
    version = meta.get('version', '')
    is_deleted = meta.get('is_deleted', False)
    is_dir = meta.get('is_dir', False)

    if is_a_hidden_path(relative_path):  # 隐藏文件不处理
        return

    if relative_path.startswith(
            '_cache/') or relative_path == '_cache':  # cache 文件不处理
        return

    if os.path.exists(join(root_path, site_name, '.sync_ignore')):
        return  # ignore

    if is_deleted:
        if os.path.exists(full_path):
            delete_file(full_path)
            # delete
            after_sync_deleted(full_path, root=root_path)
            return 'got %s from server, delete' % relative_path
    elif is_dir:
        if not os.path.isdir(full_path):
            try:
                os.makedirs(full_path)
                after_synced(full_path, root=root_path)
            except OSError:
                return 'failed to create dir %s' % full_path
            except:
                pass
    else:  # 具体的文件
        file_id = to_unicode(meta['_id'])
        need_download = True
        if os.path.isfile(full_path):
            old_version = md5_for_file(full_path)
            if old_version == version:
                need_download = False

        if need_download:
            # 需要下载具体的文件
            timeout = 20 * 60
            if is_markdown(full_path):
                timeout = 2 * 60
            response = requests.post(sync_content_url,
                                     data=dict(token=token, id=file_id),
                                     timeout=timeout)
            if response.status_code >= 400:
                # ignore
                return 'fail to get %s, status code is %s' % (
                    full_path, response.status_code)
            content = response.content
            try:
                create_file(full_path, content)
            except OSError:
                return 'failed to create file then ignore %s' % full_path

            after_synced(full_path,
                         root=root_path)  # 保存这个状态,这样下次同步的时候,就不会认为这个文件需要同步了
            time.sleep(0.2)  # 避免过度请求被服务器封锁的情况
            return 'got %s from server' % relative_path
        else:
            return '%s same file in both client side and server side' % relative_path
Ejemplo n.º 10
0
def sync_file_to_server(token, filepath, root, sync_should_url, sync_url):
    # return logs (list)
    # 将本地的path转为 Server side 需要的path
    logs = []
    filepath = same_slash(filepath)
    root = same_slash(root)

    if is_a_hidden_path(filepath):
        return []  # ignore
    if not is_sub_path(filepath, root):
        return []

    relative_path = get_relative_path(filepath, root)

    data = {
        'source': 'cli',
        'platform': PLATFORM,
        'path': to_str(relative_path),
        'token': token,
    }

    if os.path.exists(filepath):
        reason = reason_for_not_sync(filepath)
        if reason:  # 不同步处理
            return [reason]
        is_deleted = False
        is_dir = os.path.isdir(filepath)
        is_file = not is_dir
        data['client_mtime'] = str(
            datetime.datetime.fromtimestamp(os.path.getmtime(filepath),
                                            tz=tzutc()))
    else:
        is_deleted = True
        sync_data = get_sync_data(filepath, root=root)
        is_dir = sync_data.get('is_dir', False)
        is_file = False

    data['is_dir'] = str(is_dir).lower()  # 转为str
    data['is_deleted'] = str(is_deleted).lower()  # 转为str
    if not is_deleted:
        if is_dir:
            version = ''
        else:
            version = md5_for_file(filepath)
        data['version'] = version

    # 开始同步到服务器
    response = None
    if is_file:
        try:
            timeout = 20 * 60
            if is_markdown(filepath):
                timeout = 2 * 60
            try:
                checker_response = requests.post(sync_should_url,
                                                 data=data,
                                                 timeout=timeout)
                if checker_response.status_code == 401:
                    return ['bad token, break now']
                if checker_response.text == 'no':  # 不需要同步了,已经存在了
                    return ['exist on server side already']
                elif checker_response.text == 'no_site':
                    return ['no site matched, break now']  # will not sync...
            except:
                pass

            with open(filepath, 'rb') as f:
                response = requests.post(sync_url,
                                         data=data,
                                         files={'file': f},
                                         timeout=timeout)
            if response.status_code == 401:
                return ['bad token, break now']
            logs.append('uploaded to server done')
        except ConnectionError:
            logs.append('uploaded to server failed because of ConnectionError')
        except Timeout:
            logs.append('uploaded to server failed because of TimeoutError')
    else:  # 删除文件或者是目录的更新
        try:
            response = requests.post(sync_url, data=data)
            if is_deleted:
                logs.append('deleted on server')
            else:
                logs.append('update folder to server')
        except ConnectionError:
            if is_deleted:
                logs.append(
                    'delete on server failed because of ConnectionError')
            else:
                logs.append(
                    'update folder on server failed because of ConnectionError'
                )

    if response:
        status_code = response.status_code
        if status_code == 200:
            if response.headers.get(
                    'content-type') == 'application/json' and response.json(
                    ).get('error_code'):
                message = response.json().get('message') or ''
                error_code = response.json().get('error_code')
                try:
                    error_code = int(error_code)
                    if error_code in [503, 401, 404]:
                        info = 'error_code:%s %s' % (error_code, message)
                        logs.append(info)
                except:
                    pass

        if status_code == 401:  # 授权错误,删除原来保存的token
            logs.append('bad token')

    return logs