class CTERAPath: def __init__(self, relativepath, basepath): self.basepath = PurePosixPath(basepath) self.relativepath = PurePosixPath(relativepath) if self.basepath.joinpath(self.relativepath) == self.relativepath: raise InputError( 'You must specify a relative path. Omit leading / characters', str(self.relativepath), re.sub(r'^/*', '', str(self.relativepath))) def name(self): return self.relativepath.name def parent(self): return CTERAPath(str(self.relativepath.parent), str(self.basepath)) def fullpath(self): return str(self.basepath.joinpath(self.relativepath)) def encoded_fullpath(self): return quote(self.fullpath()) def encoded_parent(self): return quote(str(self.parent())) def joinpath(self, path): return CTERAPath(str(self.relativepath.joinpath(path)), str(self.basepath)) def parts(self): return self.relativepath.parts def __str__(self): return self.fullpath()
def resource_path(resource: Resource) -> PurePosixPath: """Return a path representation of resource. The resource should be location-aware, meaning, it has a ``name`` and a ``parent`` attributes. - Always with a leading slash - Never with a trailing slash - No ``index`` at the end of a collection Args: resource: The target to get the path for. Returns: A PurePosixPath with representation. """ # Bail out quickly if we are the root or in the root root_path = PurePosixPath("/") if resource.parent is None: return root_path elif resource.parent.parent is None and resource.name is not None: return root_path / resource.name lineage = list(parents(resource)) lineage.append(resource) # Get the names for each part, then join with slashes parts = [ PurePosixPath(p.name) if p.name is not None else PurePosixPath("/") for p in lineage if p ] path = root_path.joinpath(*parts) return path
def process_page(url: str, storage: str): r = requests.get(url) if r.status_code != 200: return page = BeautifulSoup(r.text) process_element(page, url) # extract url path path = urlparse(url).path while str.startswith(path, '/'): path = path[1:] # fix undefined index path = PurePosixPath(path) if path.name == '': path = PurePosixPath.joinpath(path, 'index.html') # concat with storage directory path = Path.joinpath(Path(storage), path) Path.mkdir(path.parent, parents=True, exist_ok=True) with open(path, 'wb') as f: buf = str(page).encode('utf-8') f.write(buf)
async def create_file_async(self: 'LocalFileStorage', origin_file_name: str, file_path: Path, to_bucket: str) -> None: path = PurePosixPath() s3_file_path = path.joinpath(to_bucket, origin_file_name) async with await self.__s3._connect(): await self.__s3._put_file(lpath=file_path, rpath=s3_file_path)
def getPackageUploadPathForRelease( self, dbfsBasePath: PurePosixPath) -> PurePosixPath: versionDirName = self.__packageName + '/' + self.__dateTime.strftime( '%Y-%m-%d_%H-%M-%S') + '_' + self.__randomString return dbfsBasePath.joinpath(versionDirName).joinpath( self.getPackageFilename())
def _possible_names(cls, package: PackageInfo) -> List[str]: names = list() for url_str in package.get_urls(): url = urlparse(url_str) host = url.netloc if not host.endswith('github.com'): continue path = PurePosixPath(url.path) parts = path.parts if path.is_absolute(): parts = parts[1:] if len(parts) >= 2: # Get the first 2 path components without extensions # this should handle: # - owner/project # - owner/project.git # - owner/project/releases name = PurePosixPath(parts[0]) # strip off .git if the project name contains it # don't just strip off any ext because "." is valid name_project = parts[1] if name_project.endswith('.git'): name_project = name_project[:-len('.git')] name = name.joinpath(name_project) names.append(str(name)) return names
def test_bucket_read_write(): raw_data_source = get_sample_c_cells_datasource() bucket_fs = get_test_output_bucket_fs() precomp_path = PurePosixPath("c_cells_1.precomputed") sink = PrecomputedChunksScaleSink( info_dir=precomp_path, filesystem=bucket_fs, num_channels=raw_data_source.shape.c, scale=PrecomputedChunksScale( key=PurePosixPath("exported_data"), size=(raw_data_source.shape.x, raw_data_source.shape.y, raw_data_source.shape.z), chunk_sizes=tuple([ (raw_data_source.tile_shape.x, raw_data_source.tile_shape.y, raw_data_source.tile_shape.z) ]), encoding=RawEncoder(), voxel_offset=(raw_data_source.location.x, raw_data_source.location.y, raw_data_source.location.z), resolution=raw_data_source.spatial_resolution), dtype=raw_data_source.dtype, ) sink_writer = sink.create() assert not isinstance(sink_writer, Exception) assert bucket_fs.exists(precomp_path.joinpath("info").as_posix()) assert not bucket_fs.exists( precomp_path.joinpath("i_dont_exist").as_posix()) with ProcessPoolExecutor() as executor: _ = list( executor.map(partial(_write_data, sink_writer=sink_writer), raw_data_source.roi.get_datasource_tiles())) data_proxy_source = PrecomputedChunksDataSource( path=precomp_path, filesystem=bucket_fs, resolution=(raw_data_source.spatial_resolution)) retrieved_data = data_proxy_source.retrieve() assert np.all( retrieved_data.raw("yxc") == raw_data_source.retrieve().raw("yxc"))
def _resolve_relative_path(self, path): ctx_path = PurePosixPath("/") path = PurePosixPath(path) if path.parts and path.parts[0] == "~": ctx_path = PurePosixPath(shell_context.HOME) else: ctx_path = PurePosixPath(shell_context.PWD) assert ctx_path.anchor # if path starts with /, will ignore ctx_path automatically return ctx_path.joinpath(path)
def collect_files(template_dir, url_base): basepath = PurePosixPath(template_dir) baseurl = PurePosixPath(url_base) for dirname, _, files in os.walk(template_dir): rel_dirname = PurePosixPath(dirname).relative_to(template_dir) for filename in files: template_path = path.join(dirname, filename) url = baseurl.joinpath(rel_dirname, filename) with open(template_path, "r", encoding="utf8") as f: yield str(url), file_contents(f)
def collect_files(template_dir, url_base): basepath = PurePosixPath(template_dir) baseurl = PurePosixPath(url_base) for dirname, _, files in os.walk(template_dir): rel_dirname = PurePosixPath(dirname).relative_to(template_dir) for filename in files: path = os.path.join(dirname, filename) url = baseurl.joinpath(rel_dirname, filename) with open(path) as f: yield str(url), file_contents(f)
def to_abs_path(path): global SHELL ctx_path = PurePosixPath("/") path = PurePosixPath(path) if path.parts and path.parts[0] == "~": ctx_path = PurePosixPath(SHELL.HOME) else: ctx_path = PurePosixPath(SHELL.PWD) assert ctx_path.anchor # if path starts with /, will ignore ctx_path automatically return ctx_path.joinpath(path)
def __init__(self, *, filesystem: JsonableFilesystem, outer_path: PurePosixPath, inner_path: PurePosixPath, attributes: N5DatasetAttributes): super().__init__( dtype=attributes.dataType, tile_shape=attributes.blockSize, interval=attributes.dimensions.to_interval5d(), ) self.outer_path = outer_path self.inner_path = inner_path self.full_path = outer_path.joinpath(inner_path.as_posix().lstrip("/")) self.attributes = attributes self.filesystem = filesystem
def glob(self, path: PurePosixPath, pattern: str) -> list[PurePosixPath]: glob_pattern = path.joinpath(pattern) path_strings = json.loads( self.call( [ self.UTILITY_PYTHON, "-c", f"import sys, json, glob; json.dump(glob.glob({str(glob_pattern)!r}), sys.stdout)", ], capture_output=True, ) ) return [PurePosixPath(p) for p in path_strings]
def __updateNotebooks(self, currentReleasePath: PurePosixPath, notebooks: List[Notebook], packagePath: PurePosixPath): for notebook in notebooks: targetPath = currentReleasePath.joinpath(notebook.databricksRelativePath) source = loadNotebook(notebook.path) try: self.__databricksNotebookConverter.validateSource(source) except UnexpectedSourceException: self.__logger.debug(f'Skipping unrecognized file {notebook.relativePath}') continue script = self.__databricksNotebookConverter.toWorkspaceImportNotebook(source, packagePath) self.__logger.info('Updating {}'.format(targetPath)) self.__workspaceImporter.overwriteScript(script, targetPath)
class RemoteToPathFormatter: def __init__(self, local_root, remote_root): """ local_root This Path or string should be replaced by your local root directory. remote_root This PurePosixPath or string should be put instead of the local root. """ self.local_root = Path(local_root) self.remote_root = PurePosixPath(remote_root) def format(self, local_directory): """ local_directory This directory will have it's local root folder swapped by the remote one. """ return self.remote_root.joinpath( PurePosixPath(Path(local_directory).relative_to(self.local_root)))
def replace_package_in_test_project_with_local(package, rel_package_path, cpf_root_dir): """ This function replaces a package in the cpf project situated at test_project_root_dir with the package of same name in this repository. """ fsa = filesystemaccess.FileSystemAccess() osa = miscosaccess.MiscOsAccess() this_root_dir = PurePosixPath(os.path.dirname(os.path.realpath(__file__)) + "/../..") this_package_dir = this_root_dir.joinpath('Sources/{0}'.format(package)) test_project_package_dir = cpf_root_dir.joinpath(rel_package_path) # move the .git file to a save place while the package content is replaces. gitFilePath = test_project_package_dir / ".git" tempGitFilePath = test_project_package_dir / "../.git" fsa.move(gitFilePath, tempGitFilePath) # We delete the package in the the copy the content of this package over fsa.rmtree(test_project_package_dir) shutil.copytree(str(this_package_dir), str(test_project_package_dir)) # Move the .git file back in place. fsa.remove(gitFilePath) fsa.move(tempGitFilePath, gitFilePath) # We also commit and add the changes to make sure the repository is not dirty # which is expected after a "fresh" checkout. We have to call git add for the # case that we added files to the cpf projects, which are not picket up by # git commit only. osa.execute_command_output( 'git add .', cwd=test_project_package_dir, print_output=miscosaccess.OutputMode.ON_ERROR ) osa.execute_command_output( 'git commit --allow-empty . -m "Set package content to local developer files."', cwd=test_project_package_dir, print_output=miscosaccess.OutputMode.ON_ERROR ) osa.execute_command_output( 'git commit --allow-empty . -m "Update {0}"'.format(package), cwd=cpf_root_dir, print_output=miscosaccess.OutputMode.ON_ERROR )
def __init__( self, *, filesystem: JsonableFilesystem, path: PurePosixPath, location: Optional[Point5D] = None, spatial_resolution: Optional[Tuple[int, int, int]] = None, ): with filesystem.openbin(path.joinpath("attributes.json").as_posix(), "r") as f: attributes_json = f.read().decode("utf8") self.attributes = N5DatasetAttributes.from_json_data(json.loads(attributes_json), location_override=location) super().__init__( c_axiskeys_on_disk=self.attributes.c_axiskeys, filesystem=filesystem, path=path, tile_shape=self.attributes.blockSize, interval=self.attributes.interval, dtype=self.attributes.dataType, spatial_resolution=spatial_resolution, )
def to_internal_value(self, raw): # Check if this is a base64 string if isinstance(raw, six.string_types): header = self.parser.match(raw) # Check if the base64 string is in the "data:" format if header: try: decoded_file = b64decode(self.parser.sub("", raw)) except TypeError: self.fail("invalid_image") # Generate file name: p = PurePosixPath() uid = uuid4().bytes u = urlsafe_b64encode(uid).decode("ascii").rstrip("=") filename = p.joinpath(u).as_posix() raw = ContentFile(decoded_file, name=filename) return super(Base64FileField, self).to_internal_value(raw)
def get_logger(logger_name="common", level=DEBUG, logfile=None): global loggers if loggers.get(logger_name): return loggers.get(logger_name) else: logfile = logfile if logfile else PurePosixPath.joinpath( LOG_DIR, f"{logger_name}.log") logger = logging.getLogger(logger_name, ) logger.setLevel(level) formatter = logging.Formatter( '%(asctime)s :%(levelname)s %(message)s') fh = logging.FileHandler(logfile if logfile else PurePosixPath. joinpath(LOG_DIR, f"{logger_name}.log")) fh.setFormatter(formatter) streamh = logging.StreamHandler() streamh.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(streamh) loggers[logger_name] = logger return logger
def __init__(self, project: Project): # ADD DOCSTRING # currently, cluster.connected is guaranteed to be True at this point cluster = project._cluster local_path = CLUSTERTOOLS_CONFIG_DIR.joinpath(project.name, 'project_config.ini') remote_home = PurePosixPath(cluster.getenv('HOME')) remote_path = remote_home.joinpath('.clustertools', project.name, 'project_config.ini') # needs to happen before BaseConfig._init_local is called self._config_update_hooks = ParrotDict() self._object_post_update_hooks = ParrotDict() self._object_validate_hooks = ParrotDict() for field, hook in PROJECT_CONFIG_UPDATE_HOOKS.items(): self._config_update_hooks[field] = hook(self) for field, hook in PROJECT_OBJECT_POST_UPDATE_HOOKS.items(): self._object_post_update_hooks[field] = hook(self) # also needs to happen in case self._init_local calls self._parse_config self._project = project super().__init__(cluster=cluster, local_path=local_path, remote_path=remote_path)
def prepareTestProject(repository, project, cpf_cmake_dir, cpf_buildscripts_dir, instantiating_test_module): """ This method clones a given repository of a CPF test project into the testdirectory that is stored in the global BASE_TEST_DIR variable. After that it copies the current versions of CPFCMake and CPFBuildscripts into the test project, to make sure that the tests test the code that comes with this repository and not the the versions that are included in the test projects. To save time, test projects are only cloned once for all tests. All source files in the test project should be left unchanged to prevent coupling of single tests. All Generated files are deleted before each test, so they can be changed by test cases. The instantiating_test_module string is used to keep test-file directories for fixtures instances that run in parallel apart. """ print('[{0}] Prepare test-project: {1}'.format(instantiating_test_module, project)) fsa = filesystemaccess.FileSystemAccess() osa = miscosaccess.MiscOsAccess() # clone fresh project root_parent_dir = PurePosixPath(BASE_TEST_DIR).joinpath(instantiating_test_module) cpf_root_dir = root_parent_dir.joinpath(project) if fsa.exists(cpf_root_dir): # we remove remaining testfiles at the beginning of a test, so we # have the project still available for debugging if the test fails. fsa.rmtree(cpf_root_dir) fsa.mkdirs(root_parent_dir) osa.execute_command_output('git clone --recursive {0}'.format(repository), cwd=root_parent_dir, print_output=miscosaccess.OutputMode.ON_ERROR) # Replace the CPFCMake and CPFBuildscripts packages in the test project with the ones # that are used by this repository. This makes sure that we test the versions that # are used here and not the ones that are set in the test project. replace_package_in_test_project_with_local('CPFCMake', cpf_cmake_dir, cpf_root_dir) replace_package_in_test_project_with_local('CPFBuildscripts', cpf_buildscripts_dir, cpf_root_dir) return cpf_root_dir
def connect(self, hostname: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, use_key: bool = False, port: Optional[int] = None, timeout: int = 60, retries: int = 0, retry_delay: int = 1) -> None: # ADD DOCSTRING # TODO(?): after connecting, check all local project dirs exist # on remote and are synced (checksums match) super().connect(hostname=hostname, username=username, password=password, use_key=use_key, port=port, timeout=timeout, retries=retries, retry_delay=retry_delay) remote_home = PurePosixPath(self.getenv('HOME')) self.config.remote_path = remote_home.joinpath( '.clustertools/global_config.ini')
def decrypt_path(self, path: str) -> str: """Decrypt a path component by component :param path: encrypted path to decrypt :return: the raw-text decrypted path .. note:: This only supports PosixPath types for now. .. note:: Components that cannot be decrypted remain the same and are appended to the output path non-less. """ # TODO: Add detection and support for windows paths decrypted_path = PurePosixPath() for path_comp in PurePosixPath(path).parts: try: path_comp = self.ftp_decrypt(path_comp) except (nacl.exceptions.CryptoError, IndexError, binascii.Error, ValueError): pass decrypted_path = decrypted_path.joinpath(path_comp) return str(decrypted_path)
def getWorkspaceReleasePath( self, workspaceBaseDir: PurePosixPath) -> PurePosixPath: releaseDirName = self.__dateTime.strftime( '%Y-%m-%d_%H:%M:%S') + '_' + self.__randomString return workspaceBaseDir.joinpath(releaseDirName)
def getPackageUploadPathForCurrent( self, dbfsBasePath: PurePosixPath) -> PurePosixPath: return dbfsBasePath.joinpath(self.__packageName + '/_current').joinpath( self.getPackageFilename())
class ClusterRun: def __init__(self, array_run_obj, anat_file_path, physio_file_path, suffix=""): try: self.cluster_workspace = PurePosixPath( parameter_finder(array_run_obj.anatomy_df, 'cluster_workspace')) except NameError: raise ParameterNotFoundError( "cluster_workspace is not defined for running CxSystem on cluster" ) assert self.cluster_workspace.is_absolute(), \ "cluster_workspace {} must be an absolute path with explicit [remote] home directory path".format(self.cluster_workspace.as_posix()) try: self.cluster_address = parameter_finder(array_run_obj.anatomy_df, 'cluster_address') except NameError: raise ParameterNotFoundError( "cluster_address is not defined for running CxSystem on cluster" ) try: self.cluster_login_node = parameter_finder( array_run_obj.anatomy_df, 'cluster_login_node') except NameError: print( " - No cluster login node found. Directly conencting to cluster address {}" .format(self.cluster_address)) self.cluster_login_node = '--' # # the following call will check if the cluster is available or not, but it needs root access # self.ping_cluster() try: self.cluster_username = parameter_finder(array_run_obj.anatomy_df, 'cluster_username') assert self.cluster_username != 'username', \ "Cluster username must be changed in the configuration file, currently it is the default value 'username'" print(" - Loggin in with user '%s'" % self.cluster_username) except NameError: self.cluster_username = input(' - Enter cluster username: '******'password') except NameError: if 'CLUSTERPASS' in os.environ.keys(): self.password = os.environ['CLUSTERPASS'] else: self.password = getpass.getpass( ' - Enter password for user {}: ' ''.format(self.cluster_username)) self.suffix = suffix print(" - temp file suffix is %s" % self.suffix) self.client = paramiko.SSHClient() self.client.load_system_host_keys() self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) if self.cluster_login_node != '--': print(" - Connecting to login node {}".format( self.cluster_login_node)) sock = paramiko.ProxyCommand("ssh {}@{} nc {} 22".format( self.cluster_username, self.cluster_login_node, self.cluster_address)) sock.settimeout(30) self.client.connect(self.cluster_address, port=22, username=self.cluster_username, password=self.password, sock=sock) else: self.client.connect(self.cluster_address, port=22, username=self.cluster_username, password=self.password) print(" - Connected to %s" % self.cluster_address) print(" - Creating workspace folder if not exists") self.ssh_commander('mkdir -p {}'.format( self.cluster_workspace.as_posix())) scp = SCPClient(self.client.get_transport()) if 'json' in anat_file_path.suffix.lower(): converter = fileconverter.ConfigConverter( anat_file_path.as_posix()) anat_file_path = Path(converter.save_as_csv(overwrite=True)) if 'json' in physio_file_path.suffix.lower(): converter = fileconverter.ConfigConverter( physio_file_path.as_posix()) physio_file_path = Path(converter.save_as_csv(overwrite=True)) print(" - Transferring configuration files ...") self.remote_anat_filename = '_tmp_anat_config{}.csv'.format( self.suffix) self.remote_phys_filename = '_tmp_physio_config{}.csv'.format( self.suffix) self.local_workspace_unexpanded = Path( parameter_finder(array_run_obj.anatomy_df, 'workspace_path')) self.local_workspace = Path( parameter_finder(array_run_obj.anatomy_df, 'workspace_path')).expanduser() self.local_cluster_folder = self.local_workspace.joinpath( 'cluster_run' + self.suffix) if not self.local_cluster_folder.is_dir(): os.mkdir(self.local_cluster_folder.as_posix()) try: imported_connections_file = Path( parameter_finder(array_run_obj.anatomy_df, 'import_connections_from')) if imported_connections_file.is_file(): scp.put(imported_connections_file.as_posix(), self.cluster_workspace.as_posix()) new_path = Path('./').joinpath( imported_connections_file.name).as_posix() change_parameter_value_in_file( anat_file_path.as_posix(), self.local_cluster_folder.joinpath(anat_file_path.name), 'import_connections_from', new_path) anat_file_path = self.local_cluster_folder.joinpath( anat_file_path.name) except TypeError: # this is when the value is # or -- for instance pass scp.put( anat_file_path.as_posix(), self.cluster_workspace.joinpath( self.remote_anat_filename).as_posix()) scp.put( physio_file_path.as_posix(), self.cluster_workspace.joinpath( self.remote_phys_filename).as_posix()) # ask user to set the number of nodes, time and memory: print( " - Please check the default csc_puhti.job file and set the time, memory and uncomment and enter email address if you wish." "\nNote that the number of nodes in default slurm file should always be set to 1." " Instead you should enter the number of nodes in the CxSystem network config file. " "\nAlso the default number of CPUs=16 does not need to be changed most of the times. " ) self.slurm_file_path = Path( parameter_finder(array_run_obj.anatomy_df, 'cluster_job_file_path')).expanduser() if not self.slurm_file_path.is_file(): if not self.slurm_file_path.is_absolute(): raise RelativePathError( "\nSlurm file {} not found in local workspace. Make sure the path to the file is " "absolute".format(self.slurm_file_path.as_posix())) else: raise FileNotFoundError("\nSlurm file {} not found".format( self.slurm_file_path.as_posix())) # updating remote cxsystem2 self.update_remote_cxsystem2(self.slurm_file_path, self.cluster_workspace) # building slurm : for item_idx, item in enumerate(array_run_obj.clipping_indices): with open(self.slurm_file_path.as_posix(), 'r') as sl1: remote_slurm_filename = "_tmp_slurm{}_part{}.job".format( self.suffix, item_idx) with open( self.local_cluster_folder.joinpath( remote_slurm_filename).as_posix(), 'w') as sl2: # wb -> w for line in sl1: sl2.write(line) try: sl2.write( "python -c " "\"from cxsystem2.core.cxsystem import CxSystem as cxs; " " cx = cxs('{anatomy}','{physio}', cluster_run_start_idx={cluster_start},cluster_run_step={cluster_step}); " "cx.run()\"".format( anatomy=self.remote_anat_filename, physio=self.remote_phys_filename, cluster_start=item, cluster_step=array_run_obj.clipping_indices[ item_idx + 1] - array_run_obj.clipping_indices[item_idx])) except IndexError: sl2.write( "python -c " "\"from cxsystem2.core.cxsystem import CxSystem as cxs; " " cx = cxs('{anatomy}','{physio}', cluster_run_start_idx={cluster_start},cluster_run_step={cluster_step}); " "cx.run()\"".format( anatomy=self.remote_anat_filename, physio=self.remote_phys_filename, cluster_start=item, cluster_step=array_run_obj.total_configs - array_run_obj.clipping_indices[item_idx])) scp.put( self.local_cluster_folder.joinpath( remote_slurm_filename).as_posix(), self.cluster_workspace.joinpath( remote_slurm_filename).as_posix()) print(" - Slurm file generated and copied to cluster") self.channel = self.client.invoke_shell() for item_idx, item in enumerate(array_run_obj.clipping_indices): remote_slurm_filename = "_tmp_slurm{}_part{}.job".format( self.suffix, item_idx) if platform == 'win32': print(" - Converting the file using dos2unix") self.channel.send('cd {} && dos2unix {}\n'.format( self.cluster_workspace.as_posix(), remote_slurm_filename)) time.sleep(1) self.channel.send('cd {} && sbatch {}\n'.format( self.cluster_workspace.as_posix(), remote_slurm_filename)) print(" - Job file {} submitted".format(remote_slurm_filename)) time.sleep(1) cluster_metadata = \ {'cluster_address': self.cluster_address, 'cluster_login_node': self.cluster_login_node, 'cluster_username': self.cluster_username, 'local_workspace_unexpanded': self.local_workspace_unexpanded.as_posix(), 'local_workspace': self.local_workspace.as_posix(), 'local_cluster_run_folder': self.local_cluster_folder.as_posix(), 'local_cluster_run_download_folder': self.local_cluster_folder.joinpath('downloads'), 'cluster_workspace': self.cluster_workspace.as_posix(), 'cluster_simulation_folder': self.cluster_workspace.joinpath(parameter_finder(array_run_obj.anatomy_df, 'simulation_title')).as_posix(), 'suffix': self.suffix, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-7]} with open( self.local_cluster_folder.joinpath( 'cluster_metadata{}.pkl'.format(self.suffix)), 'wb') as ff: pickle.dump(cluster_metadata, ff) print( " - Cluster metadata saved. To download the result and clean the environments after getting the email," " run the following command in the terminal:\n") print("cxcluster " + self.local_cluster_folder.joinpath( 'cluster_metadata{}.pkl'.format(self.suffix)).as_posix()) def ping_cluster(self): try: # check if the cluster address is ip or hostname socket.inet_aton(self.cluster_address) cluster_ip = self.cluster_address except OSError: cluster_ip = socket.gethostbyname(self.cluster_address) p = ping(cluster_ip, timeout=3) if not p: raise ClusterNotReachableError("Cluster node is not reachable") def ssh_commander(self, command, print_flag=False): stdin, stdout, stderr = self.client.exec_command(command, get_pty=True) out = stdout.read(), if print_flag is True: print(out[0]) return out[0] def update_remote_cxsystem2(self, slurm_path, remote_workspace): slurm_path = Path(slurm_path) remote_workspace = Path(remote_workspace) module_name = self.find_remote_python_module(slurm_path) self.ssh_commander('mkdir -p {}'.format( self.cluster_workspace.as_posix())) # Query for valid cxsystem, install/update if necessary, report print(" - Checking CxSystem2 on cluster") # Should be empty string for existing git repo git_repo_error_message = self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'cd CxSystem2 ; ' 'git -C . rev-parse'.format( workspace=remote_workspace.as_posix())).decode('utf-8') if not git_repo_error_message: git_basename = self.ssh_commander( 'cd {workspace}/CxSystem2 ; ' 'git rev-parse --show-toplevel'.format( workspace=remote_workspace.as_posix())).decode('utf-8') commit_HEAD_hash = self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'cd CxSystem2 ; ' 'git rev-parse --short HEAD'.format( workspace=remote_workspace.as_posix())).decode('utf-8') git_branch = self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'cd CxSystem2 ; ' 'git rev-parse --abbrev-ref HEAD'.format( workspace=remote_workspace.as_posix())).decode('utf-8') print( f" - The git repo is {git_basename} branch is {git_branch} commit HEAD hash is {commit_HEAD_hash}" ) print(f" - No need to download/install") else: print(" - Updating CxSystem2 on cluster") print( self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'git clone https://github.com/VisualNeuroscience-UH/CxSystem2 ; ' 'cd CxSystem2 ; ' 'git pull ; '.format(workspace=remote_workspace.as_posix() )).decode('utf-8')) print( self.ssh_commander( 'bash -lc \'' 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'echo $PATH; ' 'module load {module} ;' 'cd {cxfolder} ; ' 'python -m pip install -Ue . --user\''.format( module=module_name, cxfolder=remote_workspace.joinpath( 'CxSystem2').as_posix())).decode('utf-8')) def find_remote_python_module(self, slurm_path): module_name = '' slurm_path = Path(slurm_path) with open(slurm_path.as_posix()) as f: for line in f: if 'module load' in line.lower() and 'python' in line.lower(): module_name = line.split(' ')[-1].strip('\n') print(" - Remote module name is {}".format(module_name)) return module_name
class SCPSync: class ConfigError(RuntimeError): ... hashers = { 'sha1': hashlib.sha1, 'sha256': hashlib.sha256, 'sha512': hashlib.sha512, 'md5': hashlib.md5 } hashtype = 'sha1' ignore_fn = '.syncignore' config_fn = '.syncconfig' global_filters = ['*' + ignore_fn, '*' + config_fn] diffcolors = { '+': colorama.Fore.GREEN, '-': colorama.Fore.RED, '@': colorama.Fore.BLUE } diffcolor_standard = colorama.Fore.BLACK diffcolor_newfile = colorama.Fore.YELLOW diffcolor_binfile = colorama.Fore.MAGENTA def __init__(self, local_path='./', target_path=None, host=None, port=None, user=None, password=None, config_name=None): self.local_path = Path(local_path).resolve() try: self.config = self.read_config(config_name) except (FileNotFoundError, KeyError): self.config = {} self.config['files'] = { self.local_path.joinpath(dest).resolve(): self.local_path.joinpath(source).resolve() for dest, source in self.config.get('files', {}).items() } self.config['port'] = self.config.get('port') or 22 for key in ['host', 'port', 'user', 'password', 'target_path']: value = locals()[key] if value is not None: self.config[key] = value elif key not in self.config: raise SCPSync.ConfigError( f'{key} not specified and not in config file!') self._ssh = paramiko.SSHClient() self._ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) self._ssh.connect(self.config['host'], self.config['port'], self.config['user'], self.config['password']) self._scp = paramiko_scp.SCPClient(self._ssh.get_transport()) self._scpfile = SCPFile(self._ssh) self.remote_path = PurePosixPath(self.config['target_path']) self.name = self._random_name( 10) # Random name to use for this session self._remote_hasher_file = f'/tmp/scphash_{self.name}.py' self._remote_hashes_file = f'/tmp/scphash_{self.name}.pkl' self._remote_file_prefix = f'/tmp/scphash_{self.name}' self.filters = {} # type: Dict[Path, List[str]] self._remote_hasher_thread = None # type: Optional[Tuple[ChannelStdinFile, ChannelFile, ChannelStderrFile]] self.logger = logging.getLogger(f'SCPSync') def sync(self): # start remote hasher self.logger.info('Starting syncing') self.exec_config_command('sync_start') # self.start_remote_hasher() self.logger.info('Remote hasher started') # hash local files self.build_filters_list() self.push_remote_hahser() files = self.recursive_filelist(self.local_path) local_hashes = {file: self.hash_file(file) for file in files} for file in self.config['files'].values(): if file not in local_hashes: local_hashes[file] = self.hash_file(file) self.logger.info('Local hashes calculated') # get remote hashes self.join_remote_hasher() self.logger.info('Remote hashes calculated') remote_hashes = self.pull_remote_hashes() self.logger.info('Remote hashes pulled') self.exec_config_command('post_hash') self.logger.info('Comparing files') files_changed = [] # type: List[Tuple[Path, PurePosixPath]] for file in files: local_file = self.config['files'].get(file, file) local_hash = local_hashes[local_file] remote_file = file.relative_to(self.local_path).as_posix() remote_hash = remote_hashes.get(remote_file) if local_hash != remote_hash: remote_path = self.remote_path.joinpath(remote_file) files_changed.append((local_file, remote_path)) file_basename, file_ext = os.path.splitext(local_file) if 'diff_extensions' in self.config and file_ext in self.config[ 'diff_extensions']: self.print_diff(local_file, remote_path) if self.config.get('diff_only', False): return self.logger.info(f'Pushing {len(files_changed)} file(s)') self.push_files(files_changed) self.exec_config_command('sync_end') self.logger.info('Done syncing') def print_diff(self, local_file, remote_file): try: # Try to get the path relative to the root local_relpath = local_file.relative_to(self.local_path).as_posix() remote_relpath = remote_file.relative_to( self.remote_path).as_posix() except ValueError: # If that fails, use the absolute paths local_relpath = local_file.as_posix() remote_relpath = remote_file.as_posix() try: with open(local_file, 'rt') as b, self._scpfile(remote_file.as_posix(), 'rt') as a: a_txt = a.readlines() b_txt = b.readlines() for i, line in enumerate( difflib.unified_diff(a_txt, b_txt, 'remote::' + remote_relpath, 'local::' + local_relpath)): style = colorama.Style.BRIGHT if i < 2 else colorama.Style.NORMAL line = f'{style}{self.diffcolors.get(line[0], self.diffcolor_standard)}{line.rstrip()}{colorama.Style.RESET_ALL}' self.logger.info(line) self.logger.info('') except FileNotFoundError: self.logger.info( f'{self.diffcolor_newfile}New file: {local_relpath}{colorama.Style.RESET_ALL}' ) except UnicodeDecodeError: self.logger.info( f'{self.diffcolor_binfile}Binary file changed: {local_relpath}{colorama.Style.RESET_ALL}' ) def hash_file(self, file: Path, hasher_type=None) -> str: if hasher_type is None: hasher_type = self.hashers[self.hashtype] hasher = hasher_type() with file.open('rb') as f: for chunk in iter(lambda: f.read(4096), b""): hasher.update(chunk) return hasher.hexdigest() ### # Remote hashes stuff def push_remote_hahser(self): remote_filters = {} for path, filters in self.filters.items(): remote_path = self.remote_path.joinpath( path.relative_to(self.local_path).as_posix()) remote_filters[remote_path.as_posix()] = filters filters_file = io.BytesIO() pickle.dump(remote_filters, filters_file) filters_file.seek(0) self._scp.putfo(filters_file, f'{self._remote_file_prefix}.pkl') self._scp.put( os.path.join(os.path.dirname(__file__), 'remote_hasher.py'), f'{self._remote_file_prefix}.py') self._remote_hasher_thread = self._ssh.exec_command( f'python3 {self._remote_hasher_file} --hash {self.hashtype} {self.remote_path} {self._remote_hashes_file}' ) # remote_filters = {path.relative_to(self.local_path):filters for (path,filters) in self.filters.items()} def start_remote_hasher(self) -> None: self._scp.put( os.path.join(os.path.dirname(__file__), 'remote_hasher.py'), self._remote_hasher_file) self._remote_hasher_thread = self._ssh.exec_command( f'python3 {self._remote_hasher_file} --hash {self.hashtype} {self.remote_path} {self._remote_hashes_file}' ) def join_remote_hasher(self) -> int: assert self._remote_hasher_thread is not None, 'Remote hasher not started' stdin, stdout, stderr = self._remote_hasher_thread return stdout.channel.recv_exit_status() def pull_remote_hashes(self) -> Dict[str, str]: with self._scpfile(self._remote_hashes_file, 'rb') as f: hashes = pickle.load(f) self.exec_ssh_command( f'rm {self._remote_hasher_file} {self._remote_hashes_file}') return hashes def exec_ssh_command(self, command: str) -> Tuple[int, bytes, bytes]: stdin, stdout, stderr = self._ssh.exec_command(command) stdout_reader = StreamReader(stdout) stderr_reader = StreamReader(stderr) return stdout.channel.recv_exit_status(), stdout_reader.join( ), stderr_reader.join() def exec_config_command(self, group: str): commands = self.config.get('commands', {}).get(group) if commands: if isinstance(commands, str): commands = (commands, ) for command in commands: self.logger.info(f'Executing {group} command: "{command}"') self.exec_ssh_command(command) def push_files(self, files: List[Tuple[Path, PurePosixPath]]): for source, dest in files: self.logger.info(f'Copying {source} to {dest}') self._scp.put(source, dest.as_posix()) @staticmethod def _random_name(length: int) -> str: return ''.join( [chr(random.randint(ord('a'), ord('z'))) for i in range(length)]) def recursive_filelist(self, root: Path) -> List[Path]: """Recursively search the directory 'root' for files that do not match any rules in .syncignore files""" files = [] filters = self.get_applicable_filters(root) for fn in root.glob('*'): for filter_path, rules in filters.items(): fpath = fn.relative_to(filter_path).as_posix() if fn.is_dir(): fpath += '/' if any(fnmatch.fnmatch(fpath, rule) for rule in rules): break else: if fn.is_dir(): files.extend(self.recursive_filelist(fn)) else: files.append(fn) return files def get_applicable_filters(self, root: Path) -> Dict[str, List[str]]: filters = { path: rules for path, rules in self.filters.items() if self.common_path(path, root) == path } filters[self.local_path] = filters.get(self.local_path, []) + self.global_filters return filters def build_filters_list(self): self.filters = {} for filterfile in self.local_path.rglob(self.ignore_fn): with filterfile.open('rt') as f: rules = [ line for line in f.read().splitlines() if line if not line.startswith('# ') ] self.filters[filterfile.parent] = rules @staticmethod def common_path(*paths: Path): path_parts = [p.parts for p in paths] common_parts = [] for parts in zip(*path_parts): if len(set(parts)) != 1: break common_parts.append(parts[0]) return Path('/'.join(common_parts)) def read_config(self, name: str): return self.read_config_file(self.local_path, name) @classmethod def read_config_file(cls, file: Union[str, Path], name: str): file = Path(file) if Path(file).is_dir(): file = file.joinpath(cls.config_fn) with open(file, 'rt') as f: config = yaml.load(f, Loader=yaml.SafeLoader) return cls.extract_config(config, name) @classmethod def extract_config(cls, config: Dict, name: str) -> dict: data = config[name] if 'inherit' in data: base = cls.extract_config(config, data['inherit']) base.update(data) data = base return data
class DataSubCollection: # (Sequence): """Class the represents on part of the data. Indexing this class returns a dict with all the parameters and data sets. :param file: file object of the data :param data_path: which part of the data to read. .. autoinstanceattribute:: parameters :annotation: Dictionary of the parameters (Attributes of the hdf5 group). .. autoinstanceattribute:: DATASET_NAME :annotation: All data sets are also accessible via attributes with their name. The underlying `h5py Dataset <http://docs.h5py.org/en/latest/high/dataset.html>`_ is returned. """ def __init__(self, file, data_path): self._file = file self._data_path = PurePosixPath(data_path) self._entry = self._file.require_group(str(self._data_path)) self._getitem_callbacks = [] self._dsets = [] for i in self._entry.values(): if isinstance(i, h5py.Dataset): self._dsets.append(os.path.basename(i.name)) else: continue self.parameters = dict() for k in self._entry.attrs.keys(): self.parameters[k] = self._entry.attrs[k] if self._dsets: self._len = self._file[str(self._data_path.joinpath( self._dsets[0]))].shape[0] else: self._len = 0 for dset in self._dsets: setattr(self, dset, self._file[str(self._data_path.joinpath(dset))]) def __len__(self): return self._len def add_getitem_callback(self, callback): """add a callback to change the parameters dict for each item :param callback: callback, with signature ``callback(item, parameters)`` The callback should return a new parameters dict for the current item. """ self._getitem_callbacks.append(callback) def __getitem__(self, item): ret = dict() ret.update(self.parameters) for callback in self._getitem_callbacks: ret = callback(item, ret) for dset in self._dsets: logger.debug("reading %s" % (dset.__repr__())) dshape = len(self._entry[dset].shape) if dshape == 1 or dshape == 3: # time series of single values or images logger.debug("reading time series data") ret[dset] = self._entry[dset][item] elif dshape == 2: # image logger.debug("reading single image") ret[dset] = self._entry[dset].value else: raise ValueError( 'Dimension of data set is too large, expected 1,2,3 got %d' % dshape) return ret def __iter__(self): return (self[i] for i in range(len(self)))
def relative_path( current: PurePosixPath, target: PurePosixPath, static_prefix: Optional[PurePosixPath] = None, ) -> PurePosixPath: """Calculate a dotted path from a source to destination. Relative paths are hard. Lots of edge cases, lots of configurable policies. This function is the innermost logic, which presumes lots of complexity is handled before stuff gets passed in. Themester's logic is based on Python's ``PurePosixPath``: a virtual hierarchy that is sort of like the filesystem, but not actually tied to a filesystem. References to documents in the site and static assets are done as these virtual pure paths. Static asset references are "normalized" at definition time to be relative to a configurable site root. Both ``current`` and ``target`` are expected to start with a slash. It doesn't matter if it does or doesn't end with a slash. This function doesn't care about whether folders should get ``/index`` added to their path. In fact, it doesn't understand folders. It expects to the path to include ``index`` when current or target are a collection of some kind. Policies handled before this is called: - Adding '/index' to current/target if it is a collection - Adding a configurable suffix such as ``index.html`` - Converting a resource to a path - Detecting a resource is a collection and should get ``index`` added to path Args: current: Source from which target is relative, with leading slash target: Destination, with leading slash static_prefix: Path to insert between dots and target Returns: The path to the target. Raises: ValueError: Trying to get an invalid path. """ if not current.is_absolute(): m = f'Source path "{str(current)}" must start with a slash' raise ValueError(m) if static_prefix is None and not target.is_absolute(): m = f'Target path "{str(target)}" must start with a slash' raise ValueError(m) # Do an optimization...bail out immediately if the same, but make # it relative if current == target: return PurePosixPath(current.name) # noinspection PyTypeChecker current_parents = iter(current.parents) target_parents = target.parents result: Optional[PurePosixPath] = None hops = -1 while True: try: result = next(current_parents) hops += 1 if result in target_parents: raise StopIteration() except StopIteration: break # What is the "leftover" part of target remainder_parts = target.relative_to(str(result)) # How many hops up to go prefix = PurePosixPath("/".join(repeat("..", hops))) # Join it all together if static_prefix is None: v = prefix.joinpath(remainder_parts) else: v = prefix.joinpath(static_prefix, remainder_parts) return v
async def store_response_into_filebrowserobj(operation, task, response): # for the current message, see what the parent_path value is for that host # we want to link-up any new/updated objects to their parents if response["name"] is None or response["name"] == "": return {"status": "success"} parent = await create_and_check_parents(operation, task, response) if parent is None and response["parent_path"] != "": return { "status": "error", "error": "Failed to parse and handle file browser objects", } if "host" not in response or response["host"] == "" or response["host"] is None: response["host"] = task.callback.host # now that we have the immediate parent and all parent hierarchy create, deal with current obj and sub objects try: if ( response["name"] == "/" or len(response["parent_path"]) > 0 and response["parent_path"][0] == "/" ): parent_path = PurePosixPath(response["parent_path"]) blank_root = PurePosixPath("") else: parent_path = PureWindowsPath(response["parent_path"]) blank_root = PureWindowsPath("") query = await db_model.filebrowserobj_query() parent_path_str = str(parent_path) if not parent_path == blank_root else "" try: filebrowserobj = await db_objects.get( query, operation=operation, host=response["host"].encode("unicode-escape"), name=response["name"].encode("unicode-escape"), is_file=response["is_file"], parent=parent, parent_path=str(parent_path_str).encode("unicode-escape"), ) filebrowserobj.task = task filebrowserobj.permissions = js.dumps(response["permissions"]).encode( "unicode-escape" ) filebrowserobj.access_time = response["access_time"].encode( "unicode-escape" ) filebrowserobj.modify_time = response["modify_time"].encode( "unicode-escape" ) filebrowserobj.size = str(response["size"]).encode("unicode_escape") filebrowserobj.success = response["success"] filebrowserobj.deleted = False await db_objects.update(filebrowserobj) except Exception as e: filebrowserobj = await db_objects.create( db_model.FileBrowserObj, task=task, operation=operation, host=response["host"].encode("unicode-escape"), name=response["name"].encode("unicode-escape"), permissions=js.dumps(response["permissions"]).encode("unicode-escape"), parent=parent, parent_path=str(parent_path_str).encode("unicode-escape"), full_path=str(parent_path / response["name"]).encode("unicode_escape"), access_time=response["access_time"].encode("unicode-escape"), modify_time=response["modify_time"].encode("unicode-escape"), is_file=response["is_file"], size=str(response["size"]).encode("unicode-escape"), success=response["success"], ) if ( not filebrowserobj.is_file and "files" in response and response["files"] is not None and len(response["files"]) > 0 ): # iterate over the files and create their objects parent_path = parent_path.joinpath(response["name"]) for f in response["files"]: try: newfileobj = await db_objects.get( query, operation=operation, host=response["host"].encode("unicode-escape"), name=f["name"].encode("unicode-escape"), is_file=f["is_file"], parent=filebrowserobj, parent_path=str(parent_path).encode("unicode-escape"), ) newfileobj.task = task newfileobj.permissions = js.dumps(f["permissions"]).encode( "unicode-escape" ) newfileobj.access_time = f["access_time"].encode("unicode-escape") newfileobj.modify_time = f["modify_time"].encode("unicode-escape") newfileobj.size = str(f["size"]).encode("unicode_escape") newfileobj.deleted = False await db_objects.update(newfileobj) except Exception as e: await db_objects.create( db_model.FileBrowserObj, task=task, operation=operation, host=response["host"].encode("unicode-escape"), parent=filebrowserobj, permissions=js.dumps(f["permissions"]).encode("unicode-escape"), parent_path=str(parent_path).encode("unicode-escape"), access_time=f["access_time"].encode("unicode-escape"), modify_time=f["modify_time"].encode("unicode-escape"), size=str(f["size"]).encode("unicode-escape"), is_file=f["is_file"], name=f["name"].encode("unicode-escape"), full_path=str(parent_path / f["name"]).encode("unicode-escape"), ) return {"status": "success"} except Exception as e: print(sys.exc_info()[-1].tb_lineno) print("file_browser_api.py: " + str(e)) return {"status": "error", "error": str(e)}
watch_dir = "templates" # paraview的默认python的路径 PVPYTHON_PATH = "/data/ParaView-5.6.0-osmesa-MPI-Linux-64bit/bin/pvpython" # 家目录 创建其他用户的家目录 HOMES_PATH = "/home" # sudo密码 SUDO_PW = "09170725" # Linux用户的默认密码 DEFAULT_USER_PW = "lmtuser666" # 日志模块的配置 LOG_DIR = PurePosixPath.joinpath(BASE_DIR, "logs") # 日志记录的方式 # 可选方式 logging和sentry LOG_SYS = "logging" # 如果日志目录不存在则创建 if not Path(LOG_DIR).exists(): Path(LOG_DIR).mkdir() # 一些模板文件的目录 TEMPLATES_DIR = Path(BASE_DIR).joinpath("templates") # 脚本的目录 SCRIPTS_PATH = Path(TEMPLATES_DIR).joinpath("scripts")