def diff_and_update(self, repo_id, commit_id, org_id, users): # cause some of the properties of the seafile commit object have different names than the seaobj object # so take commit from seaobj again commit = commit_mgr.load_commit(repo_id, 1, commit_id) if commit is None: commit = commit_mgr.load_commit(repo_id, 0, commit_id) if commit is not None and commit.parent_id and not commit.second_parent_id: parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id) if parent is not None: differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() time = datetime.datetime.utcfromtimestamp(commit.ctime) session = scoped_session(self._db_session_class) if added_files or deleted_files or added_dirs or deleted_dirs or \ modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs: records = generate_filehistory_records( added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, time) with mock.patch( 'seafevents.events.handlers.save_filehistory', side_effect=save_filehistory): if appconfig.fh.enabled: save_file_histories(session, records) session.close() self._current_commit_position += 1
def diff_2(self): commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.add_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.fst_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['create_new_file.md', 'create_renamed_file.md', 'create_moved_file.md', 'create_deleted_file.md', 'create_modified_files.md'] added_folder_names = ['create_added_folder', 'create_moved_folder', 'create_deleted_folder', 'create_renamed_folder'] all_files_names = [f.path for f in added_files] all_folder_names = [f.path for f in added_dirs] for f in added_file_names: self.assertIn('/' + f, all_files_names) for f in added_folder_names: self.assertIn('/' + f, all_folder_names) commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.lst_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.add_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() self.assertTrue(len(deleted_files) > 0) self.assertEquals(deleted_files[0].path, '/create_deleted_file.md') self.assertTrue(len(modified_files) > 0) self.assertEqual(modified_files[0].path, '/create_modified_files.md') self.assertTrue(len(renamed_files) > 0) self.assertEqual(renamed_files[0].path, '/create_renamed_file.md') self.assertTrue(len(moved_files) > 0) self.assertEqual(moved_files[0].path, '/create_moved_file.md') self.assertTrue(len(deleted_dirs) > 0) self.assertEquals(deleted_dirs[0].path, '/create_deleted_folder') self.assertTrue(len(renamed_dirs) > 0) self.assertEquals(renamed_dirs[0].path, '/create_renamed_folder') self.assertTrue(len(moved_dirs) > 0) self.assertEquals(moved_dirs[0].path, '/create_moved_folder')
def test_diff_fold_dirs(self): commit = commit_mgr.load_commit(self.repo_id3, self.repo_version, self.commit_id3) # fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1'] added_dir_names = ['/folder1'] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i]) # don't fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1', '/folder1/folder2/file3'] added_dir_names = ['/folder1', '/folder1/folder2', '/folder1/folder2/foder3'] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i])
def diff_2(self): commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.add_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.fst_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = [ 'create_new_file.md', 'create_renamed_file.md', 'create_moved_file.md', 'create_deleted_file.md', 'create_modified_files.md' ] added_folder_names = [ 'create_added_folder', 'create_moved_folder', 'create_deleted_folder', 'create_renamed_folder' ] all_files_names = [f.path for f in added_files] all_folder_names = [f.path for f in added_dirs] for f in added_file_names: self.assertIn('/' + f, all_files_names) for f in added_folder_names: self.assertIn('/' + f, all_folder_names) commit = commit_mgr.load_commit(self.repo_id_2, self.repo_version, self.lst_commit) parent = commit_mgr.load_commit(self.repo_id_2, commit.version, self.add_commit) differ = CommitDiffer(self.repo_id_2, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() self.assertTrue(len(deleted_files) > 0) self.assertEqual(deleted_files[0].path, '/create_deleted_file.md') self.assertTrue(len(modified_files) > 0) self.assertEqual(modified_files[0].path, '/create_modified_files.md') self.assertTrue(len(renamed_files) > 0) self.assertEqual(renamed_files[0].path, '/create_renamed_file.md') self.assertTrue(len(moved_files) > 0) self.assertEqual(moved_files[0].path, '/create_moved_file.md') self.assertTrue(len(deleted_dirs) > 0) self.assertEqual(deleted_dirs[0].path, '/create_deleted_folder') self.assertTrue(len(renamed_dirs) > 0) self.assertEqual(renamed_dirs[0].path, '/create_renamed_folder') self.assertTrue(len(moved_dirs) > 0) self.assertEqual(moved_dirs[0].path, '/create_moved_folder')
def test_diff_fold_dirs(self): commit = commit_mgr.load_commit(self.repo_id3, self.repo_version, self.commit_id3) # fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1'] added_dir_names = ['/folder1'] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i]) # don't fold dirs # differ = CommitDiffer(self.repo_id3, commit.version, '0000000000000000000000000000000000000000', commit.root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files, \ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() added_file_names = ['/file1', '/folder1/folder2/file3'] added_dir_names = [ '/folder1', '/folder1/folder2', '/folder1/folder2/foder3' ] all_files_names = [f.path for f in added_files] all_dirs_names = [f.path for f in added_dirs] self.assertTrue(len(added_file_names) == len(all_files_names)) self.assertTrue(len(added_dir_names) == len(all_dirs_names)) for i in range(len(added_file_names)): self.assertTrue(added_file_names[i] == all_files_names[i]) for i in range(len(added_dir_names)): self.assertTrue(added_dir_names[i] == all_dirs_names[i])
def RepoUpdateEventHandler(session, msg): elements = msg.body.split('\t') if len(elements) != 3: logging.warning("got bad message: %s", elements) return repo_id = elements[1] commit_id = elements[2] if isinstance(repo_id, str): repo_id = repo_id.decode('utf8') if isinstance(commit_id, str): commit_id = commit_id.decode('utf8') commit = commit_mgr.load_commit(repo_id, 1, commit_id) if commit is None: commit = commit_mgr.load_commit(repo_id, 0, commit_id) # TODO: maybe handle merge commit. if commit is not None and commit.parent_id and not commit.second_parent_id: parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id) if parent is not None: differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() if renamed_files or renamed_dirs or moved_files or moved_dirs: changer = ChangeFilePathHandler() for r_file in renamed_files: changer.update_db_records(repo_id, r_file.path, r_file.new_path, 0) for r_dir in renamed_dirs: changer.update_db_records(repo_id, r_dir.path, r_dir.new_path, 1) for m_file in moved_files: changer.update_db_records(repo_id, m_file.path, m_file.new_path, 0) for m_dir in moved_dirs: changer.update_db_records(repo_id, m_dir.path, m_dir.new_path, 1) changer.close_session() users = [] org_id = get_org_id_by_repo_id(repo_id) if org_id > 0: users = seafile_api.org_get_shared_users_by_repo(org_id, repo_id) owner = seafile_api.get_org_repo_owner(repo_id) else: users = seafile_api.get_shared_users_by_repo(repo_id) owner = seafile_api.get_repo_owner(repo_id) if owner not in users: users = users + [owner] if not users: return time = datetime.datetime.utcfromtimestamp(msg.ctime) if added_files or deleted_files or added_dirs or deleted_dirs or \ modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs: if appconfig.fh.enabled: records = generate_filehistory_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, time) save_file_histories(session, records) records = generate_activity_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, users, time) save_user_activities(session, records) else: save_repo_rename_activity(session, commit, repo_id, parent, org_id, users, time)
def RepoUpdateEventHandler(session, msg): elements = msg['content'].split('\t') if len(elements) != 3: logging.warning("got bad message: %s", elements) return repo_id = elements[1] commit_id = elements[2] commit = commit_mgr.load_commit(repo_id, 1, commit_id) if commit is None: commit = commit_mgr.load_commit(repo_id, 0, commit_id) # TODO: maybe handle merge commit. if commit is not None and commit.parent_id and not commit.second_parent_id: parent = commit_mgr.load_commit(repo_id, commit.version, commit.parent_id) if parent is not None: differ = CommitDiffer(repo_id, commit.version, parent.root_id, commit.root_id, True, True) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff() if renamed_files or renamed_dirs or moved_files or moved_dirs: changer = ChangeFilePathHandler() for r_file in renamed_files: changer.update_db_records(repo_id, r_file.path, r_file.new_path, 0) for r_dir in renamed_dirs: changer.update_db_records(repo_id, r_dir.path, r_dir.new_path, 1) for m_file in moved_files: changer.update_db_records(repo_id, m_file.path, m_file.new_path, 0) for m_dir in moved_dirs: changer.update_db_records(repo_id, m_dir.path, m_dir.new_path, 1) changer.close_session() users = [] org_id = get_org_id_by_repo_id(repo_id) if org_id > 0: users = seafile_api.org_get_shared_users_by_repo(org_id, repo_id) owner = seafile_api.get_org_repo_owner(repo_id) else: users = seafile_api.get_shared_users_by_repo(repo_id) owner = seafile_api.get_repo_owner(repo_id) if owner not in users: users = users + [owner] if not users: return time = datetime.datetime.utcfromtimestamp(msg['ctime']) if added_files or deleted_files or added_dirs or deleted_dirs or \ modified_files or renamed_files or moved_files or renamed_dirs or moved_dirs: if appconfig.fh.enabled: records = generate_filehistory_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, time) save_file_histories(session, records) records = generate_activity_records(added_files, deleted_files, added_dirs, deleted_dirs, modified_files, renamed_files, moved_files, renamed_dirs, moved_dirs, commit, repo_id, parent, users, time) save_user_activities(session, records) else: save_repo_rename_activity(session, commit, repo_id, parent, org_id, users, time) # TODO check: catalog entry update # KEEPER logging.info("REPO UPDATED EVENT repo_id: %s" % repo_id) logging.info("Trying to create/update keeper catalog entry for repo_id: %s..." % repo_id) if bool(generate_catalog_entry_by_repo_id(repo_id)): logging.info("Success!") else: logging.error("Something went wrong...") if appconfig.enable_collab_server: send_message_to_collab_server(repo_id)
def diff_and_scan_content(self, task, client): repo_id = task.repo_id last_commit_id = task.last_commit_id new_commit_id = task.new_commit_id edb_session = appconfig.session_cls() # repo not changed, update timestamp if last_commit_id == new_commit_id: q = edb_session.query(ContentScanRecord) q = q.filter(ContentScanRecord.repo_id==repo_id, ContentScanRecord.commit_id==last_commit_id) q.update({"timestamp": self.dt}) edb_session.commit() edb_session.close() return # diff version = 1 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if new_commit is None: version = 0 new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id) if not new_commit: logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id) edb_session.close() return last_commit = None if last_commit_id: last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id) if not last_commit: logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id) edb_session.close() return new_root_id = new_commit.root_id last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID differ = CommitDiffer(repo_id, version, last_root_id, new_root_id, True, False) added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\ renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode() # Handle renamed, moved and deleted files. q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id) results = q.all() if results: path_pairs_to_rename = [] paths_to_delete = [] # renamed dirs for r_dir in renamed_dirs: r_path = r_dir.path + '/' l = len(r_path) for row in results: if r_path == row.path[:l]: new_path = r_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # moved dirs for m_dir in moved_dirs: m_path = m_dir.path + '/' l = len(m_path) for row in results: if m_path == row.path[:l]: new_path = m_dir.new_path + '/' + row.path[l:] path_pairs_to_rename.append((row.path, new_path)) # renamed files for r_file in renamed_files: r_path = r_file.path for row in results: if r_path == row.path: new_path = r_file.new_path path_pairs_to_rename.append((row.path, new_path)) # moved files for m_file in moved_files: m_path = m_file.path for row in results: if m_path == row.path: new_path = m_file.new_path path_pairs_to_rename.append((row.path, new_path)) for old_path, new_path in path_pairs_to_rename: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path) q = q.update({"path": new_path}) # deleted files for d_file in deleted_files: d_path = d_file.path for row in results: if d_path == row.path: paths_to_delete.append(row.path) # We will scan modified_files and re-record later, # so delete previous records now for m_file in modified_files: m_path = m_file.path for row in results: if m_path == row.path: paths_to_delete.append(row.path) for path in paths_to_delete: q = edb_session.query(ContentScanResult) q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path) q.delete() edb_session.commit() # scan added_files and modified_files by third-party API. files_to_scan = [] files_to_scan.extend(added_files) files_to_scan.extend(modified_files) a_count = 0 scan_results = [] for f in files_to_scan: if not self.should_scan_file (f.path, f.size): continue seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id) content = seafile_obj.get_content() if not content: continue result = client.scan(content) if result and isinstance(result, dict): item = {"path": f.path, "detail": result} scan_results.append(item) else: logging.warning('Failed to scan %s:%s', repo_id, f.path) for item in scan_results: detail = json.dumps(item["detail"]) new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail) edb_session.add(new_record) a_count += 1 if a_count >= 1: logging.info('Found %d new illegal files.', a_count) # Update ContentScanRecord if last_commit_id: q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id) q.update({"commit_id": new_commit_id, "timestamp": self.dt}) else: new_record = ContentScanRecord(repo_id, new_commit_id, self.dt) edb_session.add(new_record) edb_session.commit() edb_session.close()