def _mark_as_user_data(self, path_spec): """ path_spec can be a file or a directory. We want to match a regex like: path_spec + r'|' + pathspec + r'/.*' """ # db query to find matches and populate list of files to mark system: System = State.get_system() session: Session = State.get_db_session() base: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec))).all() dirs: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec + '/%'))).all() files = base + dirs for f in files: # update file_origin for each file. # ...update file_origin for each file. update_origin = update(FileDetail).values( origin=FileOrigin.UserData.name, ).where( FileDetail.file_detail_id == f.file_detail_id) result = session.execute(update_origin) session.flush() session.commit() self._log('Marked as UserData ' + f.file_location)
def _mark_as_content(self, path_spec): """ Link file to package and mark origin as package content. path_spec can be a file or a directory. We want to match a regex like: path_spec + r'|' + pathspec + r'/.*' """ # db query to find matches and populate list of files to mark system: System = State.get_system() session: Session = State.get_db_session() base: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec))).all() dirs: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(path_spec + '/%'))).all() files = base + dirs for f in files: # ...update file_origin for each file. update_origin = update(FileDetail).values( origin=FileOrigin.PackageContent.name, rpm_info_id=self._rpm_info.rpm_info_id).where( FileDetail.file_detail_id == f.file_detail_id) result = session.execute(update_origin) session.flush() session.commit() self._log('Linked to package ' + self._package_name + ': ' + f.file_location) self._log('Marked as PackageContent ' + f.file_location)
def refresh_mviews(self): log.info("Refreshing the symlink materialized view.") State.get_db_session().execute(""" REFRESH MATERIALIZED VIEW resolved_symlinks WITH DATA; """) log.info("Symlink materialized view refresh complete.")
def unknown_count() -> int: """ Count the number of files where FileOrigin is UnknownSource for the current system. """ system: System = State.get_system() session: Session = State.get_db_session() return session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.origin == FileOrigin.UnknownSource.name)).count()
def __exit__(self, exception_type, exception_value, traceback): if exception_value: log.error('Rolling back transaction.') State.get_db_session().rollback() else: log.info('Committing transaction.') State.get_db_session().commit() log.info('Transaction committed.') return False
def path_exists(path_spec: str) -> bool: """ Return boolean if the path exists in the source file system. """ system: System = State.get_system() session: Session = State.get_db_session() file_detail: FileDetail = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location == path_spec)).one_or_none() return file_detail is not None
def _clear_existing(self): dml = update(RpmDetail).where( RpmDetail.system_id == self.system.system_id).values( file_exists=False) result = State.get_db_session().execute(dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() log.info(f"Cleared file_exists attribute for {result.rowcount} rows.")
def origin_unknown(path_spec: str) -> bool: """ Return True if FileOrigin is UnknownSource, else False. """ system: System = State.get_system() session: Session = State.get_db_session() file_detail: FileDetail = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location == path_spec) & (FileDetail.origin == FileOrigin.UnknownSource.name)).one_or_none() return file_detail is not None
def fetch_file_detail(rpm_detail: RpmDetail) -> FileDetail: db_session = State.get_db_session() system_id = State.get_system().system_id query = db_session.query(FileDetail).join(RpmFileDetailLink).join( RpmDetail).filter((RpmDetail.system_id == system_id) & ( RpmDetail.rpm_detail_id == rpm_detail.rpm_detail_id)) file_details = query.all() if len(file_details) > 0: return file_details[0] else: raise ValueError("FileDetail cannot be loaded.")
def system_OS(distro: OSDistro) -> bool: system: System = State.get_system() if system.os_distro == 'centos': return OSDistro.CentOS else: return OSDistro.Other
def _get_files_to_create( self, origins: Tuple[str] = None, file_prefix: str = None, ) -> Iterable[Tuple[FileDetail, FileStorage]]: session = State.get_db_session() where_clause = (System.system_id == self.system.system_id) & ( FileDetail.file_type == "F") if origins: where_clause &= (FileDetail.origin.in_(origins)) if file_prefix: where_clause &= (FileDetail.file_location.startswith(file_prefix)) files = session.query( FileDetail, FileStorage, ).join(System).join(FileDetailStorageLink).join(FileStorage).filter( where_clause, ).order_by(FileDetail.file_location).yield_per(10) for file_pair in files: yield file_pair
def package_installed(package_name: str) -> List[RpmInfo]: """ Check if package is installed on the source system. Mangle package name as needed for rpm or scl naming conventions. Check the rpm_details table for a match. Return None for no match, or an rpm_details object if found. """ system: System = State.get_system() session: Session = State.get_db_session() rpm_info: List[RpmInfo] = session.query(RpmInfo).filter( (RpmInfo.system_id == system.system_id) & (RpmInfo.name == package_name)).all() return rpm_info
def path_rehydrate_as_bytes(path_spec) -> BytesIO: """ Return the contents of a modified file. """ with SftpWrapper(State.get_ssh_session().get_sftp_client()) as sftp: fh = sftp.get_file_handle(remote_name=path_spec) return fh
def __init__(self, **kwargs): name = kwargs.get("name") gather = kwargs.get("gather", False) self.system: System = State.get_system(name=name, gather=gather) if not gather: self.system_id: int = self.system.system_id
def get_directory_contents(path_spec: str) -> List[FileDetail]: system: System = State.get_system() session: Session = State.get_db_session() lookup = path_spec if not lookup.endswith(os.path.sep): lookup += os.path.sep lookup += "%" file_details: List[FileDetail] = session.query(FileDetail).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(lookup)) & (func.strpos( func.substr(FileDetail.file_location, len(lookup)), os.path.sep, ) == 0)).all() return file_details
def process_modified_files(self, modified_rpm_files: ResultProxy) -> int: self.file_difference.clear_changed_flags() log.info("Flagging modified files") rpm: RpmInfo count = 0 for rpm_file in modified_rpm_files: rpm_file.file_changed = True State.get_db_session().add(rpm_file) count += 1 log.info(f"Flagged {count} files as modified.") State.get_db_session().flush() return count
def path_modified(path_spec: str) -> bool: """ Check if a source system file was modified after package installation. path_spec is a full pathname on the source file system. Look it up in file_details table. If origin is PKG_MODIFIED, return true. """ system: System = State.get_system() session: Session = State.get_db_session() rpm_detail: RpmDetail = session.query(RpmDetail).filter( (RpmDetail.system_id == system.system_id) & (RpmDetail.file_location == path_spec)).one_or_none() if not rpm_detail: raise RpmFileNotFound("Unable to locate file in RpmDetails") return rpm_detail.file_changed
def _flag_existing(self): update_rpm = update(RpmDetail).values(file_exists=True, ).where( (RpmDetail.system_id == self.system_id) & (RpmDetail.rpm_detail_id == RpmFileDetailLink.rpm_detail_id) & (RpmFileDetailLink.file_detail_id != None)) update_result = State.get_db_session().execute(update_rpm) log.info(f"{update_result.rowcount} files flagged as existing.") update_file = update(FileDetail).values( origin=FileOrigin.PackageInstalled.name, ).where((FileDetail.system_id == self.system_id) & (FileDetail.file_detail_id == RpmFileDetailLink.file_detail_id) & (RpmFileDetailLink.rpm_detail_id != None)) update_result = State.get_db_session().execute(update_file) log.info( f"{update_result.rowcount} files flagged as PackageInstalled.")
def run_rules(self, p): system_name = State.get_system().name self._log('Start firing rules for system ' + system_name + ' Pass ' + p.name) self._log('Unmarked files: %d' % hutils.unknown_count()) for r in self._rules[p]: r.fire() self._log('Finished firing rules for system ' + system_name + ' Pass ' + p.name) self._log('Unmarked files: %d' % hutils.unknown_count())
def _get_user_directories_to_create(self): session = State.get_db_session() directories = session.query(FileDetail).join(System).filter( (System.system_id == self.system.system_id) & (FileDetail.file_type == "D") & (FileDetail.origin.in_(get_user_content_names(), ))).order_by( FileDetail.file_location).yield_per(10) for directory in directories: yield directory
def test_path_modified(test_database): session = State.get_db_session() session.execute(""" update iac.rpm_detail set file_changed = TRUE where file_location = '/etc/profile' and system_id = 1; """) assert heuristic_utils.path_modified("/etc/profile") session.rollback()
def _get_detail(self, rpm_detail: RpmDetail = None, symlink: ResolvedSymlinks = None, custom_path: str = None): if not symlink: path = custom_path or rpm_detail.file_location else: path = symlink.resolved_path return State.get_db_session().query(FileDetail).filter( FileDetail.file_location == path).one_or_none()
def _run_link_match_update(self): rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id, ).join(ResolvedSymlinks, (ResolvedSymlinks.system_id == rd.system_id) & (ResolvedSymlinks.file_location == rd.file_location)).join( fd, (ResolvedSymlinks.system_id == fd.system_id) & (fd.file_location == ResolvedSymlinks.resolved_location)).outerjoin( lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( rd.system_id == self.system_id, lk.rpm_file_detail_link_id == None) insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount
def _run_update(self): rd: RpmDetail = aliased(RpmDetail) rdu: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id).join( fd, (rd.system_id == fd.system_id) & (rd.file_location == fd.file_location)).outerjoin( lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( rd.system_id == self.system.system_id, lk.rpm_file_detail_link_id == None) insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) log.debug(f"{result.rowcount} files linked.") State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount
def _map_rpm_to_file(self, rpm_detail: RpmDetail = None): # Get by filename # happy path, no symlinks file_detail = self._get_detail(rpm_detail) if file_detail: return file_detail # try for a direct symlink match symlink = State.get_db_session().query(ResolvedSymlinks).filter( ResolvedSymlinks.file_location == rpm_detail.file_location).one_or_none() if symlink: file_detail = self._get_detail(rpm_detail=rpm_detail, symlink=symlink) if file_detail: return file_detail # no easy match here. brute force by directory directories = State.get_db_session().query(ResolvedSymlinks).filter( ResolvedSymlinks.target_type == 'D').all() for direct in directories: if rpm_detail.file_location.startswith(direct.file_location): path = ( f'{direct.resolved_location}' f'{rpm_detail.file_location[len(direct.file_location):]}') file_detail = self._get_detail( rpm_detail=rpm_detail, custom_path=path, ) if file_detail: return file_detail # TODO: What to do here? :) raise ValueError('cant find file details for rpm_id ' f'{rpm_detail.rpm_info}. {rpm_detail.file_location}')
def main(): sys = utils.os.GetArguments().parse().name print('Path to analyze:') path = input() print('OK. Examining ' + path + ' on ' + sys) system: System = State.get_system(name=sys) session: Session = State.get_db_session() lookup = path if lookup.endswith(os.path.sep): lookup += "%" file_details: List[FileDetail] = session.query( FileDetail ).filter( (FileDetail.system_id == system.system_id) & (FileDetail.file_location.like(lookup)) ).all() for f in file_details: print(f.file_location + ' is ' + f.origin)
def store_system_info(self, **kwargs): try: system = State.get_db_session().query(System).filter( System.name == kwargs["name"]).one() except NoResultFound: system = System(name=kwargs["name"]) system.hostname = kwargs.get("hostname", kwargs["name"]) system.username = kwargs.get("username", os.getlogin()) system.key_file = kwargs.get("key_file") system.use_tty = kwargs.get("use_tty", False) system.port = kwargs.get("port", 22) system.remote_name = kwargs["remote_hostname"], system.kernel_version = kwargs["kernel_version"], system.os_distro = kwargs.get("distro"), system.os_major_ver = kwargs.get("major"), system.os_minor_ver = kwargs.get("minor"), system.os_revision = kwargs.get("revision"), State.get_db_session().add(system) State.get_db_session().flush() return system
def _run(self): session = State.get_db_session() for path in ( "/etc/yum", "/etc/yum.repos.d", ): for record in hutils.get_directory_contents(path_spec=path): log.debug( f"Yum configuration file name {record.file_location}.") if record.origin == FileOrigin.UnknownSource.name: log.debug(f"Flagging {record.file_location} as UserData.") record.origin = FileOrigin.UserData.name session.add(record) session.commit()
def _add_packages(self): log.debug("entering _add_packages") session = State.get_db_session() # skip fake gpg-pubkey package # https://unix.stackexchange.com/questions/190203/what-are-gpg-pubkey-packages packages = session.query(RpmInfo).join(System).filter( (System.system_id == self.system.system_id) & not_(RpmInfo.name == "gpg-pubkey") # skip fake package ).order_by(RpmInfo.installation_date).yield_per(10) for package in packages: task = OrderedDict( name=f"Install {package.name}", package=self._get_install(package.name), ) self._yaml.append(task) log.debug("exiting _add_packages")
def _run_directory_match_update(self): rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) lk: RpmFileDetailLink = aliased(RpmFileDetailLink) query = State.get_db_session().query( rd.rpm_detail_id, fd.file_detail_id).join( ResolvedSymlinks, (rd.system_id == ResolvedSymlinks.system_id) & (ResolvedSymlinks.target_type == "D") & (func.length(rd.file_location) > func.length( ResolvedSymlinks.file_location)) & (ResolvedSymlinks.file_location == func.substr( rd.file_location, 1, func.length(ResolvedSymlinks.file_location)))).join( fd, (fd.system_id == ResolvedSymlinks.system_id) & (fd.file_location == (ResolvedSymlinks.resolved_location + func.substr( rd.file_location, func.length(ResolvedSymlinks.file_location) + 1))) ).outerjoin(lk, (lk.file_detail_id == fd.file_detail_id) & (lk.rpm_detail_id == rd.rpm_detail_id)).filter( (rd.system_id == self.system_id) & (lk.rpm_file_detail_link_id == None) & (func.coalesce(fd.file_type, "") != "S") ).distinct() insert_dml = insert(RpmFileDetailLink).from_select([ rd.rpm_detail_id, fd.file_detail_id, ], query) result = State.get_db_session().execute(insert_dml) State.get_db_session().flush() State.get_db_session().commit() self.analyze_database() return result.rowcount