def diff_and_update(self, repo_id, commit_id, org_id, users): # cause some of the properties of the seafile commit object have different names than the seaobj object # so take commit from seaobj again commit = commit_mgr.load_commit(repo_id, 1, commit_id) if commit is None: commit = commit_mgr.load_commit(repo_id, 0, commit_id) if commit is not None and commit.parent_id and not commit.second_parent_id: parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id) if parent is not None: differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() time = datetime.datetime.utcfromtimestamp(commit.ctime) session = scoped_session(self._db_session_class) if added_files or deleted_files or added_dirs or deleted_dirs or \ modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs: records = generate_filehistory_records( added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, time) with mock.patch( 'seafevents.events.handlers.save_filehistory', side_effect=save_filehistory): if appconfig.fh.enabled: save_file_histories(session, records) session.close() self._current_commit_position += 1
def diff_2(self): commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.add_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.fst_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = [ 'create_new_file.md', 'create_renamed_file.md', 'create_moved_file.md', 'create_deleted_file.md', 'create_modified_files.md' ] added_folder_names = [ 'create_added_folder', 'create_moved_folder', 'create_deleted_folder', 'create_renamed_folder' ] all_files_names = [f.path for f in added_files] all_folder_names = [f.path for f in added_dirs] for f in added_file_names: self.assertIn('/' + f, all_files_names) for f in added_folder_names: self.assertIn('/' + f, all_folder_names) commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.lst_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.add_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() self.assertTrue(len(deleted_files) > 0) self.assertEqual(deleted_files[0].path, '/create_deleted_file.md') self.assertTrue(len(modified_files) > 0) self.assertEqual(modified_files[0].path, '/create_modified_files.md') self.assertTrue(len(renamed_files) > 0) self.assertEqual(renamed_files[0].path, '/create_renamed_file.md') self.assertTrue(len(moved_files) > 0) self.assertEqual(moved_files[0].path, '/create_moved_file.md') self.assertTrue(len(deleted_dirs) > 0) self.assertEqual(deleted_dirs[0].path, '/create_deleted_folder') self.assertTrue(len(renamed_dirs) > 0) self.assertEqual(renamed_dirs[0].path, '/create_renamed_folder') self.assertTrue(len(moved_dirs) > 0) self.assertEqual(moved_dirs[0].path, '/create_moved_folder')
def load_commits_2(self): seafcmt = commit_mgr.load_commit(self.repo_id_2, 1, self.head_commit) self.assertTrue(isinstance(seafcmt, SeafCommit)) self.assertEqual('Renamed directory "create_renamed_folder"', seafcmt.description) self.assertEqual('ffc32568c059e9532cb426f19f8138c624c5cdd4', seafcmt.parent_id) self.assertEqual('obj_test', seafcmt.repo_name) self.assertEqual(1517211913, seafcmt.ctime) seafcmt = commit_mgr.load_commit(self.repo_id_2, 1, self.last_commit) self.assertEqual('Modified "added_folder.md"', seafcmt.description) self.assertEqual('9e4705d102d86756eb8ed9d8d16922ee3212c7c5', seafcmt.parent_id) self.assertEqual('obj_test', seafcmt.repo_name) self.assertEqual(1517211712, seafcmt.ctime)
def get_root_dir(repo): """ Get root commit dir """ commits = seafile_api.get_commit_list(repo.id, 0, 1) commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id) return fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id)
def get_commit(repo, commit_id=None): """ Get commit """ try: if commit_id is not None: commit = commit_mgr.load_commit(repo.id, repo.version, commit_id) else: commits = seafile_api.get_commit_list(repo.id, 0, 1) commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id) except Exception as e: # TODO: logger.error('exception: {}'.format(e)) return commit
def generate_certificate_by_repo(repo): """ Generate Cared Data Certificate by repo """ commits = seafile_api.get_commit_list(repo.id, 0, 1) commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id) return generate_certificate(repo, commit)
def diff_2(self): commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.add_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.fst_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['create_new_file.md', 'create_renamed_file.md', 'create_moved_file.md', 'create_deleted_file.md', 'create_modified_files.md'] added_folder_names = ['create_added_folder', 'create_moved_folder', 'create_deleted_folder', 'create_renamed_folder'] all_files_names = [f.path for f in added_files] all_folder_names = [f.path for f in added_dirs] for f in added_file_names: self.assertIn('/' + f, all_files_names) for f in added_folder_names: self.assertIn('/' + f, all_folder_names) commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.lst_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.add_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() self.assertTrue(len(deleted_files) > 0) self.assertEquals(deleted_files[0].path, '/create_deleted_file.md') self.assertTrue(len(modified_files) > 0) self.assertEqual(modified_files[0].path, '/create_modified_files.md') self.assertTrue(len(renamed_files) > 0) self.assertEqual(renamed_files[0].path, '/create_renamed_file.md') self.assertTrue(len(moved_files) > 0) self.assertEqual(moved_files[0].path, '/create_moved_file.md') self.assertTrue(len(deleted_dirs) > 0) self.assertEquals(deleted_dirs[0].path, '/create_deleted_folder') self.assertTrue(len(renamed_dirs) > 0) self.assertEquals(renamed_dirs[0].path, '/create_renamed_folder') self.assertTrue(len(moved_dirs) > 0) self.assertEquals(moved_dirs[0].path, '/create_moved_folder')
def test_multi_backend_read_dir(self): try: obj_stores = commit_mgr.obj_stores except AttributeError: return commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version, commit.root_id) self.assertEqual(len(dir.get_files_list()), 3) self.assertEqual(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEqual(file_a.size, 10) content = file_a.get_content() self.assertEqual(content, b'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEqual(file_c.size, 3345765) content = file_c.get_content() with open( os.path.join( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data'), 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEqual(content, content_r) # Test stream read stream = file_c.get_stream() data = b'' chunk_size = file_c.size // 5 for i in range(5): data += stream.read(chunk_size) self.assertEqual(len(data), (i + 1) * chunk_size) self.assertEqual(data, content[:len(data)]) stream.close() self.assertEqual(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def update_files_index(self, repo_id, old_commit_id, new_commit_id): if old_commit_id == new_commit_id: return old_root = None if old_commit_id: try: old_commit = commit_mgr.load_commit(repo_id, 0, old_commit_id) old_root = old_commit.root_id except GetObjectError as e: logger.debug(e) old_root = None try: new_commit = commit_mgr.load_commit(repo_id, 0, new_commit_id) except GetObjectError as e: # new commit should exists in the obj store logger.warning(e) return new_root = new_commit.root_id version = new_commit.get_version() self.files_index.update_repo_name_index(repo_id, version, new_root) if old_root == new_root: return differ = CommitDiffer(repo_id, version, old_root, new_root) added_files, deleted_files, added_dirs, deleted_dirs, modified_files = differ.diff( new_commit.ctime) # if inrecovery: # added_files = filter(lambda x:not es_check_exist(es, repo_id, x), added_files) # total_changed = sum(map(len, [added_files, deleted_files, deleted_dirs, modified_files])) # if total_changed > 10000: # logger.warning('skip large changeset: %s files(%s)', total_changed, repo_id) # return self.files_index.add_files(repo_id, version, added_files) self.files_index.delete_files(repo_id, deleted_files) self.files_index.add_dirs(repo_id, version, added_dirs) self.files_index.delete_dirs(repo_id, deleted_dirs) self.files_index.update_files(repo_id, version, modified_files)
def test_multi_backend_read_dir(self): try: obj_stores = commit_mgr.obj_stores except AttributeError: return commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id_2, self.repo_version, commit.root_id) self.assertEquals(len(dir.get_files_list()), 3) self.assertEquals(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEquals(file_a.size, 10) content = file_a.get_content() self.assertEquals(content, 'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEquals(file_c.size, 3345765) content = file_c.get_content() with open(os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data'), 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEquals(content, content_r) # Test stream read stream = file_c.get_stream() data = '' chunk_size = file_c.size / 5 for i in xrange(5): data += stream.read(chunk_size) self.assertEquals(len(data), (i + 1) * chunk_size) self.assertEquals(data, content[:len(data)]) stream.close() self.assertEquals(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def test_read_dir(self): commit = commit_mgr.load_commit(self.repo_id, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version, commit.root_id) import pprint pprint.pprint(dir.dirents) self.assertEquals(len(dir.get_files_list()), 3) self.assertEquals(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEquals(file_a.size, 10) content = file_a.get_content() self.assertEquals(content, 'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEquals(file_c.size, 3345765) content = file_c.get_content() with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEquals(content, content_r) # Test stream read stream = file_c.get_stream() data = '' chunk_size = file_c.size / 5 for i in xrange(5): data += stream.read(chunk_size) self.assertEquals(len(data), (i + 1) * chunk_size) self.assertEquals(data, content[:len(data)]) stream.close() self.assertEquals(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def test_read_dir(self): commit = commit_mgr.load_commit(self.repo_id, self.repo_version, self.commit_id) dir = fs_mgr.load_seafdir(self.repo_id, self.repo_version, commit.root_id) import pprint; pprint.pprint(dir.dirents) self.assertEquals(len(dir.get_files_list()), 3) self.assertEquals(len(dir.get_subdirs_list()), 2) dir_a = dir.lookup('folder1') self.assertIsNotNone(dir_a) dir_b = dir.lookup('第二个中文目录') self.assertIsNotNone(dir_b) dir_x = dir.lookup('not.exist') self.assertIsNone(dir_x) file_a = dir.lookup('a.md') self.assertIsNotNone(file_a) self.assertEquals(file_a.size, 10) content = file_a.get_content() self.assertEquals(content, 'hello a.md') file_b = dir.lookup('一张照片.jpg') self.assertIsNotNone(file_b) self.assertTrue(file_b.size, 155067) # Test read file more than 1 blocks file_c = dir.lookup('glib.zip') self.assertIsNotNone(file_c) self.assertEquals(file_c.size, 3345765) content = file_c.get_content() with open(os.path.join(data_dir, 'glib.zip'), 'rb') as fp: content_r = fp.read() self.assertEquals(content, content_r) # Test stream read stream = file_c.get_stream() data = '' chunk_size = file_c.size / 5 for i in xrange(5): data += stream.read(chunk_size) self.assertEquals(len(data), (i + 1) * chunk_size) self.assertEquals(data, content[:len(data)]) stream.close() self.assertEquals(data, content) file_x = dir.lookup('not.exist') self.assertIsNone(file_x)
def test_diff_fold_dirs(self): commit = commit_mgr.load_commit(self.repo_id3, self.repo_version, self.commit_id3) # fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1'] added_dir_names = ['/folder1'] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i]) # don't fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1', '/folder1/folder2/file3'] added_dir_names = [ '/folder1', '/folder1/folder2', '/folder1/folder2/foder3' ] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i])
def get_blocks(repo_id, fname, commit_id=None): """Print out blocks of file for repo and commit repo_id: repo id commit_id: commit id """ repo = get_repo(repo_id) commits = seafile_api.get_commit_list(repo.id, 0, MAX_INT) print "commits:", [(c.id, c.ctime) for c in commits] commit_id = commit_id if commit_id else commits[0].id commit = commit_mgr.load_commit(repo.id, repo.version, commit_id) dir = fs_mgr.load_seafdir(repo.id, repo.version, commit.root_id) file = dir.lookup(fname) print "File: %s, commit id: %s, root_id: %s" % (fname, commit_id, commit.root_id) if file: print "blocks: ", file.blocks else: print "No file for this commit!"
def test_diff_fold_dirs(self): commit = commit_mgr.load_commit(self.repo_id3, self.repo_version, self.commit_id3) # fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1'] added_dir_names = ['/folder1'] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i]) # don't fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1', '/folder1/folder2/file3'] added_dir_names = ['/folder1', '/folder1/folder2', '/folder1/folder2/foder3'] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i])
def RepoUpdateEventHandler(session, msg): elements = msg['content'].split('\t') if len(elements) != 3: logging.warning("got bad message: %s", elements) return repo_id = elements[1] commit_id = elements[2] commit = commit_mgr.load_commit(repo_id, 1, commit_id) if commit is None: commit = commit_mgr.load_commit(repo_id, 0, commit_id) # TODO: maybe handle merge commit. if commit is not None and commit.parent_id and not commit.second_parent_id: parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id) if parent is not None: differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() if renamed_files or renamed_dirs or moved_files or moved_dirs: changer = ChangeFilePathHandler() for r_file in renamed_files: changer.update_db_records(repo_id, r_file.path, r_file.new_path, 0) for r_dir in renamed_dirs: changer.update_db_records(repo_id, r_dir.path, r_dir.new_path, 1) for m_file in moved_files: changer.update_db_records(repo_id, m_file.path, m_file.new_path, 0) for m_dir in moved_dirs: changer.update_db_records(repo_id, m_dir.path, m_dir.new_path, 1) changer.close_session() users = [] org_id = get_org_id_by_repo_id(repo_id) if org_id > 0: users = seafile_api.org_get_shared_users_by_repo(org_id, repo_id) owner = seafile_api.get_org_repo_owner(repo_id) else: users = seafile_api.get_shared_users_by_repo(repo_id) owner = seafile_api.get_repo_owner(repo_id) if owner not in users: users = users + [owner] if not users: return time = datetime.datetime.utcfromtimestamp(msg['ctime']) if added_files or deleted_files or added_dirs or deleted_dirs or \ modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs: if appconfig.fh.enabled: records = generate_filehistory_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, time) save_file_histories(session, records) records = generate_activity_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, users, time) save_user_activities(session, records) else: save_repo_rename_activity(session, commit, repo_id, parent, org_id, users, time) # TODO check: catalog entry update # KEEPER logging.info("REPO UPDATED EVENT repo_id: %s" % repo_id) logging.info("Trying to create/update keeper catalog entry for repo_id: %s..." % repo_id) if bool(generate_catalog_entry_by_repo_id(repo_id)): logging.info("Success!") else: logging.error("Something went wrong...") if appconfig.enable_collab_server: send_message_to_collab_server(repo_id)
def RepoUpdateEventHandler(session, msg): elements = msg.body.split('\t') if len(elements) != 3: logging.warning("got bad message: %s", elements) return repo_id = elements[1] commit_id = elements[2] if isinstance(repo_id, str): repo_id = repo_id.decode('utf8') if isinstance(commit_id, str): commit_id = commit_id.decode('utf8') commit = commit_mgr.load_commit(repo_id, 1, commit_id) if commit is None: commit = commit_mgr.load_commit(repo_id, 0, commit_id) # TODO: maybe handle merge commit. if commit is not None and commit.parent_id and not commit.second_parent_id: parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id) if parent is not None: differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() if renamed_files or renamed_dirs or moved_files or moved_dirs: changer = ChangeFilePathHandler() for r_file in renamed_files: changer.update_db_records(repo_id, r_file.path, r_file.new_path, 0) for r_dir in renamed_dirs: changer.update_db_records(repo_id, r_dir.path, r_dir.new_path, 1) for m_file in moved_files: changer.update_db_records(repo_id, m_file.path, m_file.new_path, 0) for m_dir in moved_dirs: changer.update_db_records(repo_id, m_dir.path, m_dir.new_path, 1) changer.close_session() users = [] org_id = get_org_id_by_repo_id(repo_id) if org_id > 0: users = seafile_api.org_get_shared_users_by_repo(org_id, repo_id) owner = seafile_api.get_org_repo_owner(repo_id) else: users = seafile_api.get_shared_users_by_repo(repo_id) owner = seafile_api.get_repo_owner(repo_id) if owner not in users: users = users + [owner] if not users: return time = datetime.datetime.utcfromtimestamp(msg.ctime) if added_files or deleted_files or added_dirs or deleted_dirs or \ modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs: if appconfig.fh.enabled: records = generate_filehistory_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, time) save_file_histories(session, records) records = generate_activity_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, users, time) save_user_activities(session, records) else: save_repo_rename_activity(session, commit, repo_id, parent, org_id, users, time)
def diff_and_scan_content(self, task, client): repo_id = task.repo_id last_commit_id = task.last_commit_id new_commit_id = task.new_commit_id edb_session = appconfig.session_cls() # repo not changed, update timestamp if last_commit_id == new_commit_id: q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.repo_id==repo_id, ContentScanRecord.commit_id==last_commit_id) q.update({"timestamp": self.dt}) edb_session.commit() edb_session.close() return # diff version = 1 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if new_commit is None: version = 0 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if not new_commit: logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id) edb_session.close() return last_commit = None if last_commit_id: last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id) if not last_commit: logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id) edb_session.close() return new_root_id = new_commit.root_id last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID differ = CommitDiffer(repo_id, version, last_root_id, new_root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() # Handle renamed, moved and deleted files. q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id) results = q.all() if results: path_pairs_to_rename = [] paths_to_delete = [] # renamed dirs for r_dir in renamed_dirs: r_path = r_dir.path + '/' l = len(r_path) for row in results: if r_path == row.path[:l]: new_path = r_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # moved dirs for m_dir in moved_dirs: m_path = m_dir.path + '/' l = len(m_path) for row in results: if m_path == row.path[:l]: new_path = m_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # renamed files for r_file in renamed_files: r_path = r_file.path for row in results: if r_path == row.path: new_path = r_file.new_path path_pairs_to_rename.append((row.path, new_path)) # moved files for m_file in moved_files: m_path = m_file.path for row in results: if m_path == row.path: new_path = m_file.new_path path_pairs_to_rename.append((row.path, new_path)) for old_path, new_path in path_pairs_to_rename: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path) q = q.update({"path": new_path}) # deleted files for d_file in deleted_files: d_path = d_file.path for row in results: if d_path == row.path: paths_to_delete.append(row.path) # We will scan modified_files and re-record later, # so delete previous records now for m_file in modified_files: m_path = m_file.path for row in results: if m_path == row.path: paths_to_delete.append(row.path) for path in paths_to_delete: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path) q.delete() edb_session.commit() # scan added_files and modified_files by third-party API. files_to_scan = [] files_to_scan.extend(added_files) files_to_scan.extend(modified_files) a_count = 0 scan_results = [] for f in files_to_scan: if not self.should_scan_file (f.path, f.size): continue seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id) content = seafile_obj.get_content() if not content: continue result = client.scan(content) if result and isinstance(result, dict): item = {"path": f.path, "detail": result} scan_results.append(item) else: logging.warning('Failed to scan %s:%s', repo_id, f.path) for item in scan_results: detail = json.dumps(item["detail"]) new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail) edb_session.add(new_record) a_count += 1 if a_count >= 1: logging.info('Found %d new illegal files.', a_count) # Update ContentScanRecord if last_commit_id: q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id) q.update({"commit_id": new_commit_id, "timestamp": self.dt}) else: new_record = ContentScanRecord(repo_id, new_commit_id, self.dt) edb_session.add(new_record) edb_session.commit() edb_session.close()
def get_latest_commit_root_id(repo): commits = seafile_api.get_commit_list(repo.id, 0, 1) commit = commit_mgr.load_commit(repo.id, repo.version, commits[0].id) return commit.root_id