def diff_and_update(self, repo_id, commit_id, org_id, users):
        # cause some of the properties of the seafile commit object have different names than the seaobj object
        # so take commit from seaobj again
        commit = commit_mgr.load_commit(repo_id, 1, commit_id)
        if commit is None:
            commit = commit_mgr.load_commit(repo_id, 0, commit_id)
        if commit is not None and commit.parent_id and not commit.second_parent_id:
            parent = commit_mgr.load_commit(repo_id, commit.version,
                                            commit.parent_id)

            if parent is not None:
                differ = CommitDiffer(repo_id, commit.version, parent.root_id,
                                      commit.root_id, True, True)
                added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\
                        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode()

                time = datetime.datetime.utcfromtimestamp(commit.ctime)
                session = scoped_session(self._db_session_class)

                if added_files or deleted_files or added_dirs or deleted_dirs or \
                        modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs:
                    records = generate_filehistory_records(
                        added_files, deleted_files, added_dirs, deleted_dirs,
                        modified_files, renamed_files, moved_files,
                        renamed_dirs, moved_dirs, commit, repo_id, parent,
                        time)

                    with mock.patch(
                            'seafevents.events.handlers.save_filehistory',
                            side_effect=save_filehistory):
                        if appconfig.fh.enabled:
                            save_file_histories(session, records)

                session.close()
                self._current_commit_position += 1
Example #2
0
    def diff_2(self):
        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version,
                                        self.add_commit)
        parent = commit_mgr.load_commit(self.repo_id_2, commit.version,
                                        self.fst_commit)
        differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id,
                              commit.root_id, True, True)

        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()
        added_file_names = [
            'create_new_file.md', 'create_renamed_file.md',
            'create_moved_file.md', 'create_deleted_file.md',
            'create_modified_files.md'
        ]
        added_folder_names = [
            'create_added_folder', 'create_moved_folder',
            'create_deleted_folder', 'create_renamed_folder'
        ]

        all_files_names = [f.path for f in added_files]
        all_folder_names = [f.path for f in added_dirs]
        for f in added_file_names:
            self.assertIn('/' + f, all_files_names)
        for f in added_folder_names:
            self.assertIn('/' + f, all_folder_names)

        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version,
                                        self.lst_commit)
        parent = commit_mgr.load_commit(self.repo_id_2, commit.version,
                                        self.add_commit)
        differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id,
                              commit.root_id, True, True)

        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

        self.assertTrue(len(deleted_files) > 0)
        self.assertEqual(deleted_files[0].path, '/create_deleted_file.md')

        self.assertTrue(len(modified_files) > 0)
        self.assertEqual(modified_files[0].path, '/create_modified_files.md')

        self.assertTrue(len(renamed_files) > 0)
        self.assertEqual(renamed_files[0].path, '/create_renamed_file.md')

        self.assertTrue(len(moved_files) > 0)
        self.assertEqual(moved_files[0].path, '/create_moved_file.md')

        self.assertTrue(len(deleted_dirs) > 0)
        self.assertEqual(deleted_dirs[0].path, '/create_deleted_folder')

        self.assertTrue(len(renamed_dirs) > 0)
        self.assertEqual(renamed_dirs[0].path, '/create_renamed_folder')

        self.assertTrue(len(moved_dirs) > 0)
        self.assertEqual(moved_dirs[0].path, '/create_moved_folder')
Example #3
0
 def load_commits_2(self):
     seafcmt = commit_mgr.load_commit(self.repo_id_2, 1, self.head_commit)
     self.assertTrue(isinstance(seafcmt, SeafCommit))
     self.assertEqual('Renamed directory "create_renamed_folder"', seafcmt.description)
     self.assertEqual('ffc32568c059e9532cb426f19f8138c624c5cdd4', seafcmt.parent_id)
     self.assertEqual('obj_test', seafcmt.repo_name)
     self.assertEqual(1517211913, seafcmt.ctime)
     seafcmt = commit_mgr.load_commit(self.repo_id_2, 1, self.last_commit)
     self.assertEqual('Modified "added_folder.md"', seafcmt.description)
     self.assertEqual('9e4705d102d86756eb8ed9d8d16922ee3212c7c5', seafcmt.parent_id)
     self.assertEqual('obj_test', seafcmt.repo_name)
     self.assertEqual(1517211712, seafcmt.ctime)
Example #4
0
 def load_commits_2(self):
     seafcmt = commit_mgr.load_commit(self.repo_id_2, 1, self.head_commit)
     self.assertTrue(isinstance(seafcmt, SeafCommit))
     self.assertEqual('Renamed directory "create_renamed_folder"',
                      seafcmt.description)
     self.assertEqual('ffc32568c059e9532cb426f19f8138c624c5cdd4',
                      seafcmt.parent_id)
     self.assertEqual('obj_test', seafcmt.repo_name)
     self.assertEqual(1517211913, seafcmt.ctime)
     seafcmt = commit_mgr.load_commit(self.repo_id_2, 1, self.last_commit)
     self.assertEqual('Modified "added_folder.md"', seafcmt.description)
     self.assertEqual('9e4705d102d86756eb8ed9d8d16922ee3212c7c5',
                      seafcmt.parent_id)
     self.assertEqual('obj_test', seafcmt.repo_name)
     self.assertEqual(1517211712, seafcmt.ctime)
Example #5
0
def get_root_dir(repo):
    """
    Get root commit dir
    """
    commits = seafile_api.get_commit_list(repo.id, 0, 1)
    commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id)
    return fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)
Example #6
0
def get_commit(repo, commit_id=None):
    """
    Get commit
    """
    try:
        if commit_id is not None:
            commit = commit_mgr.load_commit(repo.id, repo.version, commit_id)
        else:
            commits = seafile_api.get_commit_list(repo.id, 0, 1)
            commit = commit_mgr.load_commit(repo.id, repo.version,
                                            commits[0].id)
    except Exception as e:
        # TODO:
        logger.error('exception: {}'.format(e))

    return commit
Example #7
0
def generate_certificate_by_repo(repo):
    """ Generate Cared Data Certificate by repo """

    commits = seafile_api.get_commit_list(repo.id, 0, 1)
    commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id)

    return generate_certificate(repo, commit)
Example #8
0
    def diff_2(self):
        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.add_commit)
        parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.fst_commit)
        differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True)

        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()
        added_file_names = ['create_new_file.md', 'create_renamed_file.md',
                            'create_moved_file.md', 'create_deleted_file.md',
                            'create_modified_files.md']
        added_folder_names = ['create_added_folder', 'create_moved_folder',
                              'create_deleted_folder', 'create_renamed_folder']

        all_files_names = [f.path for f in added_files]
        all_folder_names = [f.path for f in added_dirs]
        for f in added_file_names:
            self.assertIn('/' + f, all_files_names)
        for f in added_folder_names:
            self.assertIn('/' + f, all_folder_names)

        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.lst_commit)
        parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.add_commit)
        differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True)

        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

        self.assertTrue(len(deleted_files) > 0)
        self.assertEquals(deleted_files[0].path, '/create_deleted_file.md')

        self.assertTrue(len(modified_files) > 0)
        self.assertEqual(modified_files[0].path, '/create_modified_files.md')

        self.assertTrue(len(renamed_files) > 0)
        self.assertEqual(renamed_files[0].path, '/create_renamed_file.md')

        self.assertTrue(len(moved_files) > 0)
        self.assertEqual(moved_files[0].path, '/create_moved_file.md')

        self.assertTrue(len(deleted_dirs) > 0)
        self.assertEquals(deleted_dirs[0].path, '/create_deleted_folder')

        self.assertTrue(len(renamed_dirs) > 0)
        self.assertEquals(renamed_dirs[0].path, '/create_renamed_folder')

        self.assertTrue(len(moved_dirs) > 0)
        self.assertEquals(moved_dirs[0].path, '/create_moved_folder')
Example #9
0
    def test_multi_backend_read_dir(self):
        try:
            obj_stores = commit_mgr.obj_stores
        except AttributeError:
            return

        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version,
                                        self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version,
                                  commit.root_id)

        self.assertEqual(len(dir.get_files_list()), 3)
        self.assertEqual(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEqual(file_a.size, 10)
        content = file_a.get_content()
        self.assertEqual(content, b'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEqual(file_c.size, 3345765)
        content = file_c.get_content()
        with open(
                os.path.join(
                    os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'data'), 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEqual(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = b''
        chunk_size = file_c.size // 5
        for i in range(5):
            data += stream.read(chunk_size)
            self.assertEqual(len(data), (i + 1) * chunk_size)
            self.assertEqual(data, content[:len(data)])
        stream.close()

        self.assertEqual(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Example #10
0
    def update_files_index(self, repo_id, old_commit_id, new_commit_id):
        if old_commit_id == new_commit_id:
            return

        old_root = None
        if old_commit_id:
            try:
                old_commit = commit_mgr.load_commit(repo_id, 0, old_commit_id)
                old_root = old_commit.root_id
            except GetObjectError as e:
                logger.debug(e)
                old_root = None

        try:
            new_commit = commit_mgr.load_commit(repo_id, 0, new_commit_id)
        except GetObjectError as e:
            # new commit should exists in the obj store
            logger.warning(e)
            return

        new_root = new_commit.root_id
        version = new_commit.get_version()

        self.files_index.update_repo_name_index(repo_id, version, new_root)

        if old_root == new_root:
            return

        differ = CommitDiffer(repo_id, version, old_root, new_root)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files = differ.diff(
            new_commit.ctime)

        # if inrecovery:
        #     added_files = filter(lambda x:not es_check_exist(es, repo_id, x), added_files)

        # total_changed = sum(map(len, [added_files, deleted_files, deleted_dirs, modified_files]))
        # if total_changed > 10000:
        #     logger.warning('skip large changeset: %s files(%s)', total_changed, repo_id)
        #     return

        self.files_index.add_files(repo_id, version, added_files)
        self.files_index.delete_files(repo_id, deleted_files)
        self.files_index.add_dirs(repo_id, version, added_dirs)
        self.files_index.delete_dirs(repo_id, deleted_dirs)
        self.files_index.update_files(repo_id, version, modified_files)
Example #11
0
    def test_multi_backend_read_dir(self):
        try:
            obj_stores = commit_mgr.obj_stores
        except AttributeError:
            return

        commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version, commit.root_id)

        self.assertEquals(len(dir.get_files_list()), 3)
        self.assertEquals(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEquals(file_a.size, 10)
        content = file_a.get_content()
        self.assertEquals(content, 'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEquals(file_c.size, 3345765)
        content = file_c.get_content()
        with open(os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data'), 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEquals(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = ''
        chunk_size = file_c.size / 5
        for i in xrange(5):
            data += stream.read(chunk_size)
            self.assertEquals(len(data), (i + 1) * chunk_size)
            self.assertEquals(data, content[:len(data)])
        stream.close()

        self.assertEquals(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Example #12
0
    def test_read_dir(self):
        commit = commit_mgr.load_commit(self.repo_id, self.repo_version,
                                        self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version,
                                  commit.root_id)

        import pprint
        pprint.pprint(dir.dirents)

        self.assertEquals(len(dir.get_files_list()), 3)
        self.assertEquals(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEquals(file_a.size, 10)
        content = file_a.get_content()
        self.assertEquals(content, 'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEquals(file_c.size, 3345765)
        content = file_c.get_content()
        with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEquals(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = ''
        chunk_size = file_c.size / 5
        for i in xrange(5):
            data += stream.read(chunk_size)
            self.assertEquals(len(data), (i + 1) * chunk_size)
            self.assertEquals(data, content[:len(data)])
        stream.close()

        self.assertEquals(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Example #13
0
    def test_read_dir(self):
        commit = commit_mgr.load_commit(self.repo_id, self.repo_version, self.commit_id)
        dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version, commit.root_id)

        import pprint; pprint.pprint(dir.dirents)

        self.assertEquals(len(dir.get_files_list()), 3)
        self.assertEquals(len(dir.get_subdirs_list()), 2)

        dir_a = dir.lookup('folder1')
        self.assertIsNotNone(dir_a)

        dir_b = dir.lookup('第二个中文目录')
        self.assertIsNotNone(dir_b)

        dir_x = dir.lookup('not.exist')
        self.assertIsNone(dir_x)

        file_a = dir.lookup('a.md')
        self.assertIsNotNone(file_a)
        self.assertEquals(file_a.size, 10)
        content = file_a.get_content()
        self.assertEquals(content, 'hello a.md')

        file_b = dir.lookup('一张照片.jpg')
        self.assertIsNotNone(file_b)
        self.assertTrue(file_b.size, 155067)

        # Test read file more than 1 blocks
        file_c = dir.lookup('glib.zip')
        self.assertIsNotNone(file_c)
        self.assertEquals(file_c.size, 3345765)
        content = file_c.get_content()
        with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp:
            content_r = fp.read()
        self.assertEquals(content, content_r)

        # Test stream read
        stream = file_c.get_stream()
        data = ''
        chunk_size = file_c.size / 5
        for i in xrange(5):
            data += stream.read(chunk_size)
            self.assertEquals(len(data), (i + 1) * chunk_size)
            self.assertEquals(data, content[:len(data)])
        stream.close()

        self.assertEquals(data, content)

        file_x = dir.lookup('not.exist')
        self.assertIsNone(file_x)
Example #14
0
    def test_diff_fold_dirs(self):
        commit = commit_mgr.load_commit(self.repo_id3, self.repo_version,
                                        self.commit_id3)

        # fold dirs #
        differ = CommitDiffer(self.repo_id3, commit.version,
                              '0000000000000000000000000000000000000000',
                              commit.root_id, True, True)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

        added_file_names = ['/file1']
        added_dir_names = ['/folder1']

        all_files_names = [f.path for f in added_files]
        all_dirs_names = [f.path for f in added_dirs]

        self.assertTrue(len(added_file_names) == len(all_files_names))
        self.assertTrue(len(added_dir_names) == len(all_dirs_names))
        for i in range(len(added_file_names)):
            self.assertTrue(added_file_names[i] == all_files_names[i])
        for i in range(len(added_dir_names)):
            self.assertTrue(added_dir_names[i] == all_dirs_names[i])

        # don't fold dirs #
        differ = CommitDiffer(self.repo_id3, commit.version,
                              '0000000000000000000000000000000000000000',
                              commit.root_id, True, False)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

        added_file_names = ['/file1', '/folder1/folder2/file3']
        added_dir_names = [
            '/folder1', '/folder1/folder2', '/folder1/folder2/foder3'
        ]

        all_files_names = [f.path for f in added_files]
        all_dirs_names = [f.path for f in added_dirs]
        self.assertTrue(len(added_file_names) == len(all_files_names))
        self.assertTrue(len(added_dir_names) == len(all_dirs_names))
        for i in range(len(added_file_names)):
            self.assertTrue(added_file_names[i] == all_files_names[i])
        for i in range(len(added_dir_names)):
            self.assertTrue(added_dir_names[i] == all_dirs_names[i])
Example #15
0
def get_blocks(repo_id, fname, commit_id=None):
    """Print out blocks of file for repo and commit
        repo_id: repo id
        commit_id: commit id
    """
    repo = get_repo(repo_id)
    commits = seafile_api.get_commit_list(repo.id, 0, MAX_INT)

    print "commits:", [(c.id, c.ctime) for c in commits]

    commit_id = commit_id if commit_id else commits[0].id
    commit = commit_mgr.load_commit(repo.id, repo.version, commit_id)

    dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)

    file = dir.lookup(fname)
    print "File: %s, commit id: %s, root_id: %s" % (fname, commit_id,
                                                    commit.root_id)

    if file:
        print "blocks: ", file.blocks
    else:
        print "No file for this commit!"
Example #16
0
    def test_diff_fold_dirs(self):
        commit = commit_mgr.load_commit(self.repo_id3, self.repo_version, self.commit_id3)

        # fold dirs #
        differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, True)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

        added_file_names = ['/file1']
        added_dir_names = ['/folder1']

        all_files_names = [f.path for f in added_files]
        all_dirs_names = [f.path for f in added_dirs]

        self.assertTrue(len(added_file_names) == len(all_files_names))
        self.assertTrue(len(added_dir_names) == len(all_dirs_names))
        for i in range(len(added_file_names)):
            self.assertTrue(added_file_names[i] == all_files_names[i])
        for i in range(len(added_dir_names)):
            self.assertTrue(added_dir_names[i] == all_dirs_names[i])

        # don't fold dirs #
        differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, False)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

        added_file_names = ['/file1', '/folder1/folder2/file3']
        added_dir_names = ['/folder1', '/folder1/folder2', '/folder1/folder2/foder3']

        all_files_names = [f.path for f in added_files]
        all_dirs_names = [f.path for f in added_dirs]
        self.assertTrue(len(added_file_names) == len(all_files_names))
        self.assertTrue(len(added_dir_names) == len(all_dirs_names))
        for i in range(len(added_file_names)):
            self.assertTrue(added_file_names[i] == all_files_names[i])
        for i in range(len(added_dir_names)):
            self.assertTrue(added_dir_names[i] == all_dirs_names[i])
Example #17
0
def RepoUpdateEventHandler(session, msg):
    elements = msg['content'].split('\t')
    if len(elements) != 3:
        logging.warning("got bad message: %s", elements)
        return

    repo_id = elements[1]
    commit_id = elements[2]

    commit = commit_mgr.load_commit(repo_id, 1, commit_id)
    if commit is None:
        commit = commit_mgr.load_commit(repo_id, 0, commit_id)

    # TODO: maybe handle merge commit.
    if commit is not None and commit.parent_id and not commit.second_parent_id:

        parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id)

        if parent is not None:
            differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id,
                                  True, True)
            added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\
                renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff()

            if renamed_files or renamed_dirs or moved_files or moved_dirs:
                changer = ChangeFilePathHandler()
                for r_file in renamed_files:
                    changer.update_db_records(repo_id, r_file.path, r_file.new_path, 0)
                for r_dir in renamed_dirs:
                    changer.update_db_records(repo_id, r_dir.path, r_dir.new_path, 1)
                for m_file in moved_files:
                    changer.update_db_records(repo_id, m_file.path, m_file.new_path, 0)
                for m_dir in moved_dirs:
                    changer.update_db_records(repo_id, m_dir.path, m_dir.new_path, 1)
                changer.close_session()

            users = []
            org_id = get_org_id_by_repo_id(repo_id)
            if org_id > 0:
                users = seafile_api.org_get_shared_users_by_repo(org_id, repo_id)
                owner = seafile_api.get_org_repo_owner(repo_id)
            else:
                users = seafile_api.get_shared_users_by_repo(repo_id)
                owner = seafile_api.get_repo_owner(repo_id)

            if owner not in users:
                users = users + [owner]
            if not users:
                return

            time = datetime.datetime.utcfromtimestamp(msg['ctime'])
            if added_files or deleted_files or added_dirs or deleted_dirs or \
                    modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs:

                if appconfig.fh.enabled:
                    records = generate_filehistory_records(added_files, deleted_files,
                                    added_dirs, deleted_dirs, modified_files, renamed_files,
                                    moved_files, renamed_dirs, moved_dirs, commit, repo_id,
                                    parent, time)
                    save_file_histories(session, records)

                records = generate_activity_records(added_files, deleted_files,
                        added_dirs, deleted_dirs, modified_files, renamed_files,
                        moved_files, renamed_dirs, moved_dirs, commit, repo_id,
                        parent, users, time)

                save_user_activities(session, records)
            else:
                save_repo_rename_activity(session, commit, repo_id, parent, org_id, users, time)

            # TODO check: catalog entry update
            # KEEPER
            logging.info("REPO UPDATED EVENT repo_id: %s" % repo_id)
            logging.info("Trying to create/update keeper catalog entry for repo_id: %s..." % repo_id)
            if bool(generate_catalog_entry_by_repo_id(repo_id)):
                logging.info("Success!")
            else:
                logging.error("Something went wrong...")


            if appconfig.enable_collab_server:
                send_message_to_collab_server(repo_id)
Example #18
0
def RepoUpdateEventHandler(session, msg):
    elements = msg.body.split('\t')
    if len(elements) != 3:
        logging.warning("got bad message: %s", elements)
        return

    repo_id = elements[1]
    commit_id = elements[2]
    if isinstance(repo_id, str):
        repo_id = repo_id.decode('utf8')
    if isinstance(commit_id, str):
        commit_id = commit_id.decode('utf8')

    commit = commit_mgr.load_commit(repo_id, 1, commit_id)
    if commit is None:
        commit = commit_mgr.load_commit(repo_id, 0, commit_id)

    # TODO: maybe handle merge commit.
    if commit is not None and commit.parent_id and not commit.second_parent_id:

        parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id)

        if parent is not None:
            differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id,
                                  True, True)
            added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\
                    renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode()

            if renamed_files or renamed_dirs or moved_files or moved_dirs:
                changer = ChangeFilePathHandler()
                for r_file in renamed_files:
                    changer.update_db_records(repo_id, r_file.path, r_file.new_path, 0)
                for r_dir in renamed_dirs:
                    changer.update_db_records(repo_id, r_dir.path, r_dir.new_path, 1)
                for m_file in moved_files:
                    changer.update_db_records(repo_id, m_file.path, m_file.new_path, 0)
                for m_dir in moved_dirs:
                    changer.update_db_records(repo_id, m_dir.path, m_dir.new_path, 1)
                changer.close_session()

            users = []
            org_id = get_org_id_by_repo_id(repo_id)
            if org_id > 0:
                users = seafile_api.org_get_shared_users_by_repo(org_id, repo_id)
                owner = seafile_api.get_org_repo_owner(repo_id)
            else:
                users = seafile_api.get_shared_users_by_repo(repo_id)
                owner = seafile_api.get_repo_owner(repo_id)

            if owner not in users:
                users = users + [owner]
            if not users:
                return

            time = datetime.datetime.utcfromtimestamp(msg.ctime)
            if added_files or deleted_files or added_dirs or deleted_dirs or \
                    modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs:

                if appconfig.fh.enabled:
                    records = generate_filehistory_records(added_files, deleted_files,
                                    added_dirs, deleted_dirs, modified_files, renamed_files,
                                    moved_files, renamed_dirs, moved_dirs, commit, repo_id,
                                    parent, time)
                    save_file_histories(session, records)

                records = generate_activity_records(added_files, deleted_files,
                        added_dirs, deleted_dirs, modified_files, renamed_files,
                        moved_files, renamed_dirs, moved_dirs, commit, repo_id,
                        parent, users, time)

                save_user_activities(session, records)
            else:
                save_repo_rename_activity(session, commit, repo_id, parent, org_id, users, time)
Example #19
0
    def diff_and_scan_content(self, task, client):
        repo_id = task.repo_id
        last_commit_id = task.last_commit_id
        new_commit_id = task.new_commit_id
        edb_session = appconfig.session_cls()

        # repo not changed, update timestamp
        if last_commit_id == new_commit_id:
            q = edb_session.query(ContentScanRecord)
            q = q.filter(ContentScanRecord.repo_id==repo_id,
                         ContentScanRecord.commit_id==last_commit_id)
            q.update({"timestamp": self.dt})
            edb_session.commit()
            edb_session.close()
            return

        # diff
        version = 1
        new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id)
        if new_commit is None:
            version = 0
            new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id)
        if not new_commit:
            logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id)
            edb_session.close()
            return
        last_commit = None
        if last_commit_id:
            last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id)
            if not last_commit:
                logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id)
                edb_session.close()
                return
        new_root_id = new_commit.root_id
        last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID

        differ = CommitDiffer(repo_id, version, last_root_id, new_root_id,
                              True, False)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode()

        # Handle renamed, moved and deleted files.
        q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id)
        results = q.all()
        if results:
            path_pairs_to_rename = []
            paths_to_delete = []
            # renamed dirs
            for r_dir in renamed_dirs:
                r_path = r_dir.path + '/'
                l = len(r_path)
                for row in results:
                    if r_path == row.path[:l]:
                        new_path = r_dir.new_path + '/' + row.path[l:]
                        path_pairs_to_rename.append((row.path, new_path))
            # moved dirs
            for m_dir in moved_dirs:
                m_path = m_dir.path + '/'
                l = len(m_path)
                for row in results:
                    if m_path == row.path[:l]:
                        new_path = m_dir.new_path + '/' + row.path[l:]
                        path_pairs_to_rename.append((row.path, new_path))
            # renamed files
            for r_file in renamed_files:
                r_path = r_file.path
                for row in results:
                    if r_path == row.path:
                        new_path = r_file.new_path
                        path_pairs_to_rename.append((row.path, new_path))
            # moved files
            for m_file in moved_files:
                m_path = m_file.path
                for row in results:
                    if m_path == row.path:
                        new_path = m_file.new_path
                        path_pairs_to_rename.append((row.path, new_path))

            for old_path, new_path in path_pairs_to_rename:
                q = edb_session.query(ContentScanResult)
                q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path)
                q = q.update({"path": new_path})

            # deleted files
            for d_file in deleted_files:
                d_path = d_file.path
                for row in results:
                    if d_path == row.path:
                        paths_to_delete.append(row.path)
            # We will scan modified_files and re-record later,
            # so delete previous records now
            for m_file in modified_files:
                m_path = m_file.path
                for row in results:
                    if m_path == row.path:
                        paths_to_delete.append(row.path)

            for path in paths_to_delete:
                q = edb_session.query(ContentScanResult)
                q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path)
                q.delete()

            edb_session.commit()

        # scan added_files and modified_files by third-party API.
        files_to_scan = []
        files_to_scan.extend(added_files)
        files_to_scan.extend(modified_files)
        a_count = 0
        scan_results = []
        for f in files_to_scan:
            if not self.should_scan_file (f.path, f.size):
                continue
            seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id)
            content = seafile_obj.get_content()
            if not content:
                continue
            result = client.scan(content)
            if result and isinstance(result, dict):
                item = {"path": f.path, "detail": result}
                scan_results.append(item)
            else:
                logging.warning('Failed to scan %s:%s', repo_id, f.path)

        for item in scan_results:
            detail = json.dumps(item["detail"])
            new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail)
            edb_session.add(new_record)
            a_count += 1
        if a_count >= 1:
            logging.info('Found %d new illegal files.', a_count)

        # Update ContentScanRecord
        if last_commit_id:
            q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id)
            q.update({"commit_id": new_commit_id, "timestamp": self.dt})
        else:
            new_record = ContentScanRecord(repo_id, new_commit_id, self.dt)
            edb_session.add(new_record)

        edb_session.commit()
        edb_session.close()
Example #20
0
def get_latest_commit_root_id(repo):
    commits = seafile_api.get_commit_list(repo.id, 0, 1)
    commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id)
    return commit.root_id