Example #1
0
    def begin(self) -> None:
        if self.have_active_commit(): raise Exception()

        active_files = {}
        head = self.get_head()

        if head != 'root':
            commit = self.read_commit_index_object(head)
            active_files = self.flatten_dir_tree(
                self.read_dir_tree(commit['tree_root']))

        # Active commit files stores all of the files which will be in this revision,
        # including ones carried over from the previous revision
        sfs.file_put_contents(
            sfs.cpjoin(self.base_path, 'active_commit_files'),
            bytes(json.dumps(active_files), encoding='utf8'))

        # Active commit changes stores a log of files which have been added, changed
        # or deleted in this revision
        sfs.file_put_contents(
            sfs.cpjoin(self.base_path, 'active_commit_changes'),
            bytes(json.dumps([]), encoding='utf8'))

        # Store that there is an active commit
        sfs.file_put_contents(sfs.cpjoin(self.base_path, 'active_commit'),
                              b'true')
Example #2
0
    def fs_put_from_file(self, source_file: str, file_info) -> None:
        if not self.have_active_commit(): raise Exception()
        file_info['hash'] = file_hash = sfs.hash_file(source_file)

        target_base = sfs.cpjoin(self.base_path, 'files', file_hash[:2])
        target = sfs.cpjoin(target_base, file_hash[2:])
        if not os.path.isfile(target):
            # log items which don't already exist so that we do not have to read the objects referenced in
            # all existing commits to determine if the new objects are garbage in case of a commit roll back
            self.gc_log_item('file', file_hash)

            # ---
            sfs.make_dirs_if_dont_exist(target_base)
            shutil.move(source_file, target)
        else:
            os.remove(source_file)

        #=======================================================
        # Update commit changes
        #=======================================================
        def helper(contents):
            file_info['status'] = 'changed' if file_info[
                'path'] in contents else 'new'
            return contents + [file_info]

        self.update_system_file('active_commit_changes', helper)

        #=======================================================
        # Update commit files
        #=======================================================
        def helper2(contents):
            contents[file_info['path']] = file_info
            return contents

        self.update_system_file('active_commit_files', helper2)
Example #3
0
    def commit(self, commit_message, commit_by, commit_datetime=None) -> str:
        if not self.have_active_commit(): raise Exception()

        current_changes = json.loads(
            sfs.file_get_contents(
                sfs.cpjoin(self.base_path, 'active_commit_changes')))
        active_files = json.loads(
            sfs.file_get_contents(
                sfs.cpjoin(self.base_path, 'active_commit_files')))

        if current_changes == []: raise Exception('Empty commit')

        # Create and store the file tree
        tree_root = self.write_dir_tree(self.build_dir_tree(active_files))

        # If no commit message is passed store an indication of what was changed
        if commit_message == '':
            new_item = next((change for change in current_changes
                             if change['status'] in ['new', 'changed']), None)
            deleted_item = next((change for change in current_changes
                                 if change['status'] == 'deleted'), None)

            commit_message = "(Generated message)\n"
            if new_item is not None:
                commit_message += new_item['status'] + '    ' + new_item[
                    'path'] + '\n'
            if deleted_item is not None:
                commit_message += deleted_item[
                    'status'] + '    ' + deleted_item['path'] + '\n'
            if len(current_changes) > 2: commit_message += '...'

        # Commit timestamp
        commit_datetime = datetime.utcnow(
        ) if commit_datetime is None else commit_datetime
        commit_timestamp = commit_datetime.strftime("%d-%m-%Y %H:%M:%S:%f")

        # Create commit
        commit_object_hash = self.write_index_object(
            'commit', {
                'parent': self.get_head(),
                'utc_date_time': commit_timestamp,
                'commit_by': commit_by,
                'commit_message': commit_message,
                'tree_root': tree_root,
                'changes': current_changes
            })

        #update head, write plus move for atomicity
        sfs.file_put_contents(sfs.cpjoin(self.base_path, 'new_head'),
                              bytes(commit_object_hash, encoding='utf8'))
        os.rename(sfs.cpjoin(self.base_path, 'new_head'),
                  sfs.cpjoin(self.base_path, 'head'))

        #and clean up working state
        os.remove(sfs.cpjoin(self.base_path, 'active_commit_changes'))
        os.remove(sfs.cpjoin(self.base_path, 'active_commit_files'))
        sfs.ignore(os.remove, sfs.cpjoin(self.base_path, 'gc_log'))
        os.remove(sfs.cpjoin(self.base_path, 'active_commit'))

        return commit_object_hash
    def test_storage_put_rollback(self):
        """ Test that file put rolls back correctly """

        s = storage(DATA_DIR, CONF_DIR)
        s.begin()
        s.file_put_contents('hello', b'test content')
        s.rollback()

        self.assertFalse(os.path.isfile(cpjoin(DATA_DIR, 'hello')),
                         msg='File "hello" still exists, put rollback failed')

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, CONF_DIR, BACKUP_DIR, '1_hello')),
            msg='Backup file "1_hello" does not exist, put rollback failed')
Example #5
0
    def have_active_commit(self) -> bool:
        """ Checks if there is an active commit owned by the specified user """

        commit_state = sfs.file_or_default(
            sfs.cpjoin(self.base_path, 'active_commit'), None)
        if commit_state is not None: return True
        return False
    def with_exclusive_lock():
        if not varify_user_lock(repository_path, session_token):
            return fail(lock_fail_msg)

        #===
        data_store = versioned_storage(repository_path)
        if not data_store.have_active_commit():
            return fail(no_active_commit_msg)

        # There is no valid reason for path traversal characters to be in a file path within this system
        file_path = request.headers['path']
        if any(True for item in re.split(r'\\|/', file_path)
               if item in ['..', '.']):
            return fail()

        #===
        tmp_path = cpjoin(repository_path, 'tmp_file')
        with open(tmp_path, 'wb') as f:
            while True:
                chunk = request.body.read(1000 * 1000)
                if chunk is None: break
                f.write(chunk)

        #===
        data_store.fs_put_from_file(tmp_path, {'path': file_path})

        # updates the user lock expiry
        update_user_lock(repository_path, session_token)
        return success()
Example #7
0
    def rollback(self) -> None:
        if not self.have_active_commit(): raise Exception()

        gc_log_contents: str = sfs.file_or_default(
            sfs.cpjoin(self.base_path, 'gc_log'), b'').decode('utf8')

        gc_log_items = [
            file_row.split(' ') for file_row in gc_log_contents.splitlines()
        ]

        if gc_log_items != []:
            # If a commit exists and it's hash matches the current head we do not need to do anything
            # The commit succeeded but we failed before deleting the active commit file for some reason
            is_commit = next(
                (item for item in gc_log_items if item[0] == 'commit'), None)
            if is_commit is not None and is_commit[1] == self.get_head():
                pass  # commit actually ok

            else:  # commit not ok
                for item in gc_log_items:
                    # delete the object for this file, noting that it may not exist
                    object_dir = 'files' if item[0] == 'file' else 'index'
                    target_base = sfs.cpjoin(self.base_path, object_dir,
                                             item[1][:2])
                    sfs.ignore(os.remove, sfs.cpjoin(target_base, item[1][2:]))
                    sfs.ignore(os.rmdir, target_base)

        sfs.ignore(os.remove,
                   sfs.cpjoin(self.base_path, 'active_commit_changes'))
        sfs.ignore(os.remove, sfs.cpjoin(self.base_path,
                                         'active_commit_files'))
        sfs.ignore(os.remove, sfs.cpjoin(self.base_path, 'gc_log'))
        os.remove(sfs.cpjoin(self.base_path, 'active_commit')
                  )  # if this is being called, this file should always exist
def read_user_lock(repository_path: str):
    try:
        user_lock = file_get_contents(cpjoin(repository_path, 'user_file'))
        if user_lock == '': return None
        return json.loads(user_lock)
    except IOError:
        return None
    except ValueError:
        return None
    def test_storage_move_overwrite_rollback(self):
        """ Test file move rolls back correctly when move overwrites another file """

        s = storage(DATA_DIR, CONF_DIR)
        s.begin()
        s.file_put_contents('hello', b'test content')
        s.file_put_contents('hello2', b'test content 2')
        s.commit(True)
        s.move_file('hello', 'hello2')
        s.rollback()

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, 'hello')),
            msg='File "hello" does not exist, move overwrite rollback failed')

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, 'hello2')),
            msg='File "hello2" does not exist, move overwrite rollback failed')
Example #10
0
 def read_index_object(self, object_hash: str,
                       expected_object_type: str) -> indexObject:
     index_object: indexObject = json.loads(
         sfs.file_get_contents(
             sfs.cpjoin(self.base_path, 'index', object_hash[:2],
                        object_hash[2:])))
     if index_object['type'] != expected_object_type:
         raise IOError('Type of object does not match expected type')
     return index_object
Example #11
0
    def write_index_object(self, object_type: str, contents: Dict[str,
                                                                  Any]) -> str:
        new_object: indexObject = {'type': object_type}
        new_object.update(contents)  #type: ignore
        serialised = json.dumps(new_object)
        object_hash = hashlib.sha256(bytes(serialised,
                                           encoding='utf8')).hexdigest()
        target_base = sfs.cpjoin(self.base_path, 'index', object_hash[:2])
        if os.path.isfile(sfs.cpjoin(target_base, object_hash[2:])):
            return object_hash

        # log items which do not exist for garbage collection
        self.gc_log_item(object_type, object_hash)

        #----
        sfs.make_dirs_if_dont_exist(target_base)
        sfs.file_put_contents(sfs.cpjoin(target_base, object_hash[2:]),
                              bytes(serialised, encoding='utf8'))
        return object_hash
def update_user_lock(repository_path: str, session_token: bytes):
    """ Write or clear the user lock file """  # NOTE ALWAYS use within lock access callback

    # While the user lock file should ALWAYS be written only within a lock_access
    # callback, it is sometimes read asynchronously. Because of this updates to
    # the file must be atomic. Write plus move is used to achieve this.
    real_path: str = cpjoin(repository_path, 'user_file')
    tmp_path: str = cpjoin(repository_path, 'new_user_file')

    with open(tmp_path, 'w') as fd2:
        if session_token is None: fd2.write('')
        else:
            fd2.write(
                json.dumps({
                    'session_token': session_token.decode('utf8'),
                    'expires': int(time.time()) + 30
                }))
        fd2.flush()
    os.rename(tmp_path, real_path)
def lock_access(repository_path: str, callback: Callable[[], Responce]):
    """ Synchronise access to the user file between processes, this specifies
    which user is allowed write access at the current time """

    with open(cpjoin(repository_path, 'lock_file'), 'w') as fd:
        try:
            fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
            returned = callback()
            fcntl.flock(fd, fcntl.LOCK_UN)
            return returned
        except IOError:
            return fail(lock_fail_msg)
def init(unlocked=False):
    global data_store, server_connection, config
    try:
        config = json.loads(
            file_get_contents(
                cpjoin(working_copy_base_path, '.shttpfs',
                       'client_configuration.json')))
    except IOError:
        raise SystemExit('No shttpfs configuration found')
    except ValueError:
        raise SystemExit('Configuration file syntax error')

    # Lock for sanity check, only one client can use the working copy at any time
    try:
        lockfile = open(
            cpjoin(working_copy_base_path, '.shttpfs', 'lock_file'), 'w')
        fcntl.flock(lockfile, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except IOError:
        raise SystemExit('Could not lock working copy')

    #-----------
    ignore_filters: str = file_or_default(
        cpjoin(working_copy_base_path, '.shttpfs_ignore'), b'').decode('utf8')
    pull_ignore_filters: str = file_or_default(
        cpjoin(working_copy_base_path, '.shttpfs_pull_ignore'),
        b'').decode('utf8')

    #-----------
    config['ignore_filters']: List[str] = ['/.shttpfs*'
                                           ] + ignore_filters.splitlines()
    config['pull_ignore_filters']: List[str] = pull_ignore_filters.splitlines()
    config['data_dir']: str = working_copy_base_path

    if not unlocked:
        config["private_key"] = crypto.unlock_private_key(
            config["private_key"])

    data_store = plain_storage(config['data_dir'])
    server_connection = client_http_request(config['server_domain'])
def varify_user_lock(repository_path: str, session_token: bytes):
    """ Verify that a returning user has a valid token and their lock has not expired """

    with open(cpjoin(repository_path, 'user_file'), 'r') as fd2:
        content = fd2.read()
        if len(content) == 0: return False
        try:
            res = json.loads(content)
        except ValueError:
            return False
        return res['session_token'].encode('utf8') == session_token and int(
            time.time()) < int(res['expires'])
    return False
    def test_storage_delete_rollback(self):
        """ Test file delete rolls back correctly """

        s = storage(DATA_DIR, CONF_DIR)
        s.begin()
        s.file_put_contents('hello', b'test content')
        s.commit(True)
        s.delete_file('hello')
        s.rollback()

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, 'hello')),
            msg='error, file "hello" does not exist, delete rollback failed')
    def test_storage_move_rollback(self):
        """ Test file move rolls back correctly """

        s = storage(DATA_DIR, CONF_DIR)
        s.begin()
        s.file_put_contents('hello', b'test content')
        s.commit(True)
        s.move_file('hello', 'hello2')
        s.rollback()

        self.assertFalse(
            os.path.isfile(cpjoin(DATA_DIR, 'hello2')),
            msg='File "hello2" still exists, move rollback failed')
Example #18
0
    def commit(self, cont: bool = False):
        """ Finish a transaction """

        if self.journal is None: raise Exception('Must call begin first')

        self.journal.close()  # type: ignore
        self.journal = None
        os.remove(self.j_file)

        for itm in os.listdir(self.tmp_dir):
            os.remove(cpjoin(self.tmp_dir, itm))

        if cont is True: self.begin()
    def test_hash_file(self):
        """ Test that file hash returns the correct result. """

        make_data_dir()

        file_path = cpjoin(DATA_DIR, 'test')
        file_put_contents(file_path, b'some file contents')

        expected_result = 'cf57fcf9d6d7fb8fd7d8c30527c8f51026aa1d99ad77cc769dd0c757d4fe8667'
        result = hash_file(file_path)

        self.assertEqual(expected_result,
                         result,
                         msg='Hashes are not the same')

        delete_data_dir()
def can_aquire_user_lock(repository_path: str, session_token: bytes):
    """ Allow a user to acquire the lock if no other user is currently using it, if the original
    user is returning, presumably after a network error, or if the lock has expired.  """
    # NOTE ALWAYS use within lock access callback

    user_file_path: str = cpjoin(repository_path, 'user_file')
    if not os.path.isfile(user_file_path): return True
    with open(user_file_path, 'r') as fd2:
        content: str = fd2.read()
        if len(content) == 0: return True
        try:
            res = json.loads(content)
        except ValueError:
            return True
        if res['expires'] < int(time.time()): return True
        elif res['session_token'] == session_token: return True
    return False
def have_authenticated_user(client_ip: str, repository: str,
                            session_token: bytes):
    """ check user submitted session token against the db and that ip has not changed """

    if repository not in config['repositories']: return False

    repository_path = config['repositories'][repository]['path']
    conn = auth_db_connect(cpjoin(repository_path, 'auth_transient.db'))

    # Garbage collect session tokens. We must not garbage collect the authentication token of the client
    # which is currently doing a commit. Large files can take a long time to upload and during this time,
    # the locks expiration is not being updated thus can expire. This is a problem here as session tokens
    # table is garbage collected every time a user authenticates. It does not matter if the user_lock
    # expires while the client also holds the flock, as it is updated to be in the future at the end of
    # the current operation. We exclude any tokens owned by the client which currently owns the user
    # lock for this reason.
    user_lock = read_user_lock(repository_path)
    active_commit = user_lock[
        'session_token'] if user_lock is not None else None

    if active_commit is not None:
        conn.execute(
            "delete from session_tokens where expires < ? and token != ?",
            (time.time(), active_commit))
    else:
        conn.execute("delete from session_tokens where expires < ?",
                     (time.time(), ))

    # Get the session token
    res = conn.execute(
        "select * from session_tokens where token = ? and ip = ?",
        (session_token, client_ip)).fetchall()

    if res != [] and repository in config['users'][
            res[0]['username']]['uses_repositories']:
        conn.execute(
            "update session_tokens set expires = ? where token = ? and ip = ?",
            (time.time() + extend_session_duration, session_token, client_ip))

        conn.commit()  # to make sure the update and delete have the same view

        return res[0]

    conn.commit()
    return False
def begin_auth(request: Request) -> Responce:
    """ Request authentication token to sign """

    repository = request.headers['repository']
    if repository not in config['repositories']: return fail(no_such_repo_msg)

    # ==
    repository_path = config['repositories'][repository]['path']
    conn = auth_db_connect(cpjoin(repository_path, 'auth_transient.db'))
    gc_tokens(conn)

    # Issue a new token
    auth_token = base64.b64encode(pysodium.randombytes(35)).decode('utf-8')
    conn.execute("insert into tokens (expires, token, ip) values (?,?,?)",
                 (time.time() + 30, auth_token, request.remote_addr))
    conn.commit()

    return success({'auth_token': auth_token})
    def test_get_changes_since(self):
        file_put_contents(cpjoin(DATA_DIR, 'test 1'), b'test')
        file_put_contents(cpjoin(DATA_DIR, 'test 2'), b'test 1')
        file_put_contents(cpjoin(DATA_DIR, 'test 3'), b'test 2')

        #==================
        data_store = versioned_storage(DATA_DIR)
        data_store.begin()
        data_store.fs_put_from_file(cpjoin(DATA_DIR, 'test 1'),
                                    {'path': '/test/path'})
        id1 = data_store.commit('test msg', 'test user')

        changes = data_store.get_changes_since('root', data_store.get_head())

        self.assertEqual(
            changes, {
                '/test/path': {
                    'hash':
                    '9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08',
                    'path': '/test/path',
                    'status': 'new'
                }
            })

        #==================
        data_store.begin()
        data_store.fs_put_from_file(cpjoin(DATA_DIR, 'test 2'),
                                    {'path': '/another/path'})
        data_store.fs_put_from_file(cpjoin(DATA_DIR, 'test 3'),
                                    {'path': '/yet/another/path'})
        data_store.commit('test msg', 'test user')

        changes = data_store.get_changes_since(id1, data_store.get_head())

        self.assertEqual(
            changes, {
                '/another/path': {
                    'hash':
                    'f67213b122a5d442d2b93bda8cc45c564a70ec5d2a4e0e95bb585cf199869c98',
                    'path': '/another/path',
                    'status': 'new'
                },
                '/yet/another/path': {
                    'hash':
                    'dec2e4bc4992314a9c9a51bbd859e1b081b74178818c53c19d18d6f761f5d804',
                    'path': '/yet/another/path',
                    'status': 'new'
                }
            })
def pull_file(request: Request) -> Responce:
    """ Get a file from the server """

    session_token = request.headers['session_token'].encode('utf8')
    repository = request.headers['repository']

    #===
    current_user = have_authenticated_user(request.remote_addr, repository,
                                           session_token)
    if current_user is False: return fail(user_auth_fail_msg)

    #===
    data_store = versioned_storage(config['repositories'][repository]['path'])
    file_info = data_store.get_file_info_from_path(request.headers['path'])

    full_file_path: str = cpjoin(
        data_store.get_file_directory_path(file_info['hash']),
        file_info['hash'][2:])
    return success({'file_info_json': json.dumps(file_info)},
                   ServeFile(full_file_path))
    def test_rollback(self):
        file_put_contents(cpjoin(DATA_DIR, 'test 1'), b'test')
        file_put_contents(cpjoin(DATA_DIR, 'test 2'), b'test')
        file_put_contents(cpjoin(DATA_DIR, 'test 3'), b'test 2')

        #==================
        data_store = versioned_storage(DATA_DIR)

        data_store.begin()
        data_store.fs_put_from_file(cpjoin(DATA_DIR, 'test 1'),
                                    {'path': '/test/path'})
        data_store.commit('test msg', 'test user')

        data_store.begin()
        data_store.fs_put_from_file(cpjoin(DATA_DIR, 'test 2'),
                                    {'path': '/another/path'})
        data_store.fs_put_from_file(cpjoin(DATA_DIR, 'test 3'),
                                    {'path': '/yet/another/path'})
        data_store.rollback()

        self.assertEqual(os.listdir(cpjoin(DATA_DIR, 'files')), ['9f'])
        self.assertEqual(
            os.listdir(cpjoin(DATA_DIR, 'files', '9f')),
            ['86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08'])
    def test_storage_multiple_rollback(self):
        """ Test rollback of multiple things at once """

        s = storage(DATA_DIR, CONF_DIR)
        s.begin()
        s.file_put_contents('hello', b'test content')
        s.commit(True)
        s.file_put_contents('hello2', b'test content 2')
        s.file_put_contents('hello3', b'test content 3')
        s.move_file('hello', 'goodbye')
        s.move_file('hello2', 'hello3')
        s.delete_file('hello3')
        s.file_put_contents('hello3', b'something else')
        s.rollback()

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, 'hello')),
            msg='File "hello" does not exist, multiple rollback failed')

        self.assertFalse(
            os.path.isfile(cpjoin(DATA_DIR, 'hello3')),
            msg='File "hello3" still exists, multiple rollback failed')

        self.assertFalse(
            os.path.isfile(cpjoin(DATA_DIR, 'goodbye')),
            msg='File "goodbye" still exists, multiple rollback failed')

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, CONF_DIR, BACKUP_DIR, '1_hello3')),
            msg=
            'Backup file "1_hello3" does not exist, multiple rollback failed')

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, CONF_DIR, BACKUP_DIR, '2_hello3')),
            msg=
            'Backup file "2_hello3" does not exist, multiple rollback failed')

        self.assertTrue(
            os.path.isfile(cpjoin(DATA_DIR, CONF_DIR, BACKUP_DIR, '3_hello2')),
            msg=
            'Backup file "3_hello2" does not exist, multiple rollback failed')
    def __init__(self, data_dir):
        """ Setup and validate file system structure """

        storage.__init__(self, data_dir, '.shttpfs')
        self.manifest_file = cpjoin('.shttpfs', 'manifest.json')
Example #28
0
    def get_full_file_path(self, *args):
        """ make path relative to DATA DIR from a system relative path """

        return cpjoin(self.data_dir, *args)
Example #29
0
 def get_file_directory_path(self, file_hash: str) -> str:
     return sfs.cpjoin(self.base_path, 'files', file_hash[:2])
Example #30
0
 def gc_log_item(self, item_type: str, item_hash: str) -> None:
     with open(sfs.cpjoin(self.base_path, 'gc_log'), 'a') as gc_log:
         gc_log.write(item_type + ' ' + item_hash + '\n')
         gc_log.flush()