def complete_file_input_values(input_key, input_value): """ Completes the information inside a given file input value. Will alter the given input_value. Creates the following keys (if not already present): path, basename, dirname, nameroot, nameext :param input_key: An input key as string :param input_value: An input value with class 'File' """ # define basename if 'basename' in input_value: basename = PurePosixPath(input_value['basename']) else: basename = PurePosixPath(input_key) input_value['basename'] = basename.as_posix() # define dirname if 'dirname' in input_value: dirname = PurePosixPath(input_value['dirname']) else: dirname = default_inputs_dirname() input_value['dirname'] = dirname.as_posix() # define nameroot, nameext # noinspection PyTypeChecker nameroot, nameext = os.path.splitext(basename.as_posix()) input_value['nameroot'] = nameroot input_value['nameext'] = nameext # define path input_value['path'] = (dirname / basename).as_posix()
def tryLoad(cls, filesystem: JsonableFilesystem, path: PurePosixPath) -> "PrecomputedChunksInfo | Exception": url = filesystem.geturl(path.as_posix()) if not filesystem.exists(path.as_posix()): return FileNotFoundError(f"Could not find info file at {url}") with filesystem.openbin(path.as_posix(), "r") as f: try: info_json = f.read().decode("utf8") return PrecomputedChunksInfo.from_json_value( json.loads(info_json)) except Exception: return ValueError( f"Could not interpret json info file at {url}")
class AutoEncoderDataSet(AbstractDataSet): def __init__( self, filepath: str, load_args: Optional[Dict[str, Any]] = None, save_args: Optional[Dict[str, Any]] = None, ) -> None: self._filepath = PurePosixPath(filepath) self._load_args = load_args if load_args else {} self._save_args = save_args if save_args else {} def _load(self) -> AutoEncoder: state_dict = torch.load(str(self._filepath)) input_shape = state_dict["encoder_hidden_layer.weight"].shape[1] autoencoder = AutoEncoder(input_shape=input_shape) autoencoder.load_state_dict(state_dict) return autoencoder def _save(self, autoencoder: AutoEncoder) -> None: torch.save(autoencoder.state_dict(), str(self._filepath), **self._save_args) def _exists(self) -> bool: return Path(self._filepath.as_posix()).exists() def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, load_args=self._load_args, save_args=self._save_args, )
class GCNDataSet(AbstractDataSet): def __init__( self, filepath: str, load_args: Optional[Dict[str, Any]] = None, save_args: Optional[Dict[str, Any]] = None, ) -> None: self._filepath = PurePosixPath(filepath) self._load_args = load_args if load_args else {} self._save_args = save_args if save_args else {} def _load(self) -> Model: state_dict = torch.load(str(self._filepath)) in_features, hidden_features, out_features = ( state_dict["in_features.weight"].shape[1], state_dict["hidden_features.weight"].shape[1], state_dict["out_features.weight"].shape[1], ) model = Model(in_features, hidden_features, out_features) model.load_state_dict(state_dict) return model def _save(self, model: Model) -> None: torch.save(model.state_dict(), str(self._filepath), **self._save_args) def _exists(self) -> bool: return Path(self._filepath.as_posix()).exists() def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, load_args=self._load_args, save_args=self._save_args, )
def __init__( self, *, path: PurePosixPath, location: Point5D = Point5D.zero(), filesystem: JsonableFilesystem, tile_shape: Optional[Shape5D] = None, spatial_resolution: Optional[Tuple[int, int, int]] = None, ): raw_data: "np.ndarray[Any, Any]" = skimage.io.imread(filesystem.openbin(path.as_posix())) # type: ignore c_axiskeys_on_disk = "yxc"[: len(raw_data.shape)] self._data = Array5D(raw_data, axiskeys=c_axiskeys_on_disk, location=location) if tile_shape is None: tile_shape = Shape5D.hypercube(256).to_interval5d().clamped(self._data.shape).shape super().__init__( c_axiskeys_on_disk=c_axiskeys_on_disk, filesystem=filesystem, path=path, dtype=self._data.dtype, interval=self._data.interval, tile_shape=tile_shape, spatial_resolution=spatial_resolution, )
def generate_lorenz(self, beta, rho, sigma, dt, size): """ Generating Lorenz modeled time series :param size: :param beta: :param rho: :param sigma: :param dt: :return: """ file_name = "lorenz_{rho}_{sigma}_{dt}_{size}.npy".format(rho=rho, sigma=sigma, dt=str(dt), size=size) path_to_file = PurePosixPath(self.PATH_TO_TS) / file_name self.path_to_data = path_to_file if path.exists(path_to_file): print("-> Time series with parameters rho={rho}, sigma={sigma}, dt={dt}, size={size} already exists". \ format(rho=rho, sigma=sigma, dt=str(dt), size=size)) else: print("-> Creating time series with parameters rho={rho}, sigma={sigma}, dt={dt}, size={size}". \ format(rho=rho, sigma=sigma, dt=str(dt), size=size)) rk = RungeKutta(beta=beta, rho=rho, sigma=sigma, dt=dt) ts = rk.get_series(n_iterations=size) with open(path_to_file.as_posix(), "wb") as f: np.save(file=f, arr=ts)
def __init__(self, *, outer_path: Path, inner_path: PurePosixPath, location: Point5D = Point5D.zero(), filesystem: JsonableFilesystem): self.outer_path = outer_path self.inner_path = inner_path self.filesystem = filesystem binfile = filesystem.openbin(outer_path.as_posix()) f = h5py.File(binfile, "r") try: dataset = f[inner_path.as_posix()] if not isinstance(dataset, h5py.Dataset): raise ValueError(f"{inner_path} is not a Dataset") axiskeys = self.getAxisKeys(dataset) self._dataset = cast(h5py.Dataset, dataset) tile_shape = Shape5D.create(raw_shape=self._dataset.chunks or self._dataset.shape, axiskeys=axiskeys) super().__init__( tile_shape=tile_shape, interval=Shape5D.create( raw_shape=self._dataset.shape, axiskeys=axiskeys).to_interval5d(location), dtype=self._dataset.dtype, axiskeys=axiskeys, ) except Exception as e: f.close() raise e
def init(): from datetime import datetime from flask import Flask from flask_bootstrap import Bootstrap from flask_nav import Nav, register_renderer from pathlib import PurePosixPath from .bootstrap import top_nav, CustomBootstrapRenderer from .config import PORTAL_NON_ROOT, SECRET_KEY, DEBUG, LAB_NAME, MAX_UPLOAD_SIZE, YANDEX_METRIKA from .views import view_bp app = Flask(__name__) app.config['DEBUG'] = DEBUG app.config['SECRET_KEY'] = SECRET_KEY app.config['BOOTSTRAP_SERVE_LOCAL'] = DEBUG app.config['ERROR_404_HELP'] = False app.config['MAX_CONTENT_LENGTH'] = MAX_UPLOAD_SIZE app.jinja_env.globals.update(year=datetime.utcnow, laboratory=LAB_NAME, yandex=YANDEX_METRIKA) register_renderer(app, 'myrenderer', CustomBootstrapRenderer) nav = Nav(app) nav.register_element('top_nav', top_nav) Bootstrap(app) app_url = PurePosixPath('/') / (PORTAL_NON_ROOT or '') app.register_blueprint( view_bp, url_prefix=app_url.as_posix() if PORTAL_NON_ROOT else None) return app
def create( cls, *, outer_path: Path, inner_path: PurePosixPath, filesystem: JsonableFilesystem, attributes: N5DatasetAttributes, ) -> "N5DatasetSink": full_path = outer_path.joinpath(inner_path.as_posix().lstrip("/")) filesystem.makedirs(full_path.as_posix(), recreate=True) with filesystem.openbin( outer_path.joinpath("attributes.json").as_posix(), "w") as f: f.write(json.dumps({"n5": "2.0.0"}).encode("utf8")) with filesystem.openbin( full_path.joinpath("attributes.json").as_posix(), "w") as f: f.write(json.dumps(attributes.to_json_data()).encode("utf-8")) # create all directories in the constructor to avoid races when processing tiles created_dirs: Set[Path] = set() for tile in attributes.interval.split(attributes.blockSize): dir_path = full_path / attributes.get_tile_path(tile).parent if dir_path and dir_path not in created_dirs: # print(f"Will create dir at {dir_path}") filesystem.makedirs(dir_path.as_posix()) created_dirs.add(dir_path) return N5DatasetSink( path=full_path, filesystem=filesystem, attributes=attributes, )
def walk_launch_files( base: PurePosixPath, filter_path: Callable[[PurePosixPath], bool] ) -> Generator[PurePosixPath, Any, None]: for root, dirs, files in os.walk(base.as_posix()): for path in (os.path.join(root, name) for name in files): path: PurePosixPath = PurePosixPath(path) if filter_path(PurePosixPath(path)): yield path
def init_app(): from .config import (PORTAL_NON_ROOT, SECRET_KEY, DEBUG, LAB_NAME, RESIZE_URL, IMAGES_PATH, MAX_UPLOAD_SIZE, YANDEX_METRIKA, VK_ENABLE, JOBS_ENABLE, CGRDB_ENABLE, VIEW_ENABLE) from .logins import load_user app = Flask(__name__) app.config['DEBUG'] = DEBUG app.config['SECRET_KEY'] = SECRET_KEY app.config['ERROR_404_HELP'] = False login_manager = LoginManager() login_manager.init_app(app) login_manager.login_view = '.login' login_manager.user_loader(load_user) app_url = PurePosixPath('/') / (PORTAL_NON_ROOT or '') if VIEW_ENABLE: from flask_bootstrap import Bootstrap from flask_misaka import Misaka from flask_nav import Nav, register_renderer from flask_resize import Resize from misaka import HTML_ESCAPE from .views import view_bp from .views.bootstrap import top_nav, CustomBootstrapRenderer, CustomMisakaRenderer app.config['BOOTSTRAP_SERVE_LOCAL'] = DEBUG app.config['RESIZE_URL'] = RESIZE_URL app.config['RESIZE_ROOT'] = IMAGES_PATH app.config['MAX_CONTENT_LENGTH'] = MAX_UPLOAD_SIZE app.jinja_env.globals.update(year=datetime.utcnow, laboratory=LAB_NAME, yandex=YANDEX_METRIKA) Resize(app) register_renderer(app, 'myrenderer', CustomBootstrapRenderer) nav = Nav(app) nav.register_element('top_nav', top_nav) Bootstrap(app) Misaka(app, renderer=CustomMisakaRenderer(flags=0 | HTML_ESCAPE), tables=True, autolink=True, underline=True, math=True, strikethrough=True, superscript=True, footnotes=True) app.register_blueprint(view_bp, url_prefix=app_url.as_posix() if PORTAL_NON_ROOT else None) if JOBS_ENABLE: from .API import load_jobs app.register_blueprint(load_jobs(), url_prefix=(app_url / 'api' / 'jobs').as_posix()) if CGRDB_ENABLE: from .API import load_cgrdb app.register_blueprint(load_cgrdb(), url_prefix=(app_url / 'api' / 'db').as_posix()) if VK_ENABLE: from .vk import vk_bp app.register_blueprint(vk_bp, url_prefix=(app_url / 'api' / 'vk').as_posix()) return app
def __init__( self, *, datascheme: Optional[DataScheme] = None, protocol: Protocol, hostname: str, port: Optional[int] = None, path: PurePosixPath, search: Optional[Mapping[str, str]] = None, hash_: Optional[str] = None, search_quoting_method: SearchQuotingMethod = SearchQuotingMethod. QUOTE_PLUS, ): if not path.is_absolute(): raise ValueError("Path '{path}' is not absolute") path_parts: List[str] = [] for part in path.as_posix().split("/"): if part == "." or part == "": continue if part == "..": if len(path_parts) > 0: _ = path_parts.pop() else: path_parts.append(part) self.datascheme = datascheme self.protocol = protocol self.hostname = hostname self.host = hostname + ("" if port is None else f":{port}") self.port = port self.path = PurePosixPath("/") / "/".join(path_parts) self.search = search or {} self.hash_ = hash_ self.schemeless_raw = f"{protocol}://{self.host}" self.schemeless_raw += str(path) if self.search: if search_quoting_method == SearchQuotingMethod.QUOTE_PLUS: quote_via = quote_plus else: quote_via = quote self.schemeless_raw += "?" + urlencode( self.search, doseq=True, quote_via=quote_via) if self.hash_: self.schemeless_raw += "#" + self.hash_ if self.datascheme: self.raw = f"{self.datascheme}+{self.schemeless_raw}" self.double_protocol_raw = f"{self.datascheme}://{self.schemeless_raw}" else: self.raw = self.schemeless_raw self.double_protocol_raw = self.raw if hostname == "" and protocol not in (Protocol.FILE, Protocol.MEMORY): raise ValueError(f"Missing hostname in {self.raw}") super().__init__()
def __init__(self, *, filesystem: JsonableFilesystem, outer_path: PurePosixPath, inner_path: PurePosixPath, attributes: N5DatasetAttributes): super().__init__( dtype=attributes.dataType, tile_shape=attributes.blockSize, interval=attributes.dimensions.to_interval5d(), ) self.outer_path = outer_path self.inner_path = inner_path self.full_path = outer_path.joinpath(inner_path.as_posix().lstrip("/")) self.attributes = attributes self.filesystem = filesystem
def __init__(self, *, outer_path: PurePosixPath, inner_path: PurePosixPath, location: Point5D = Point5D.zero(), filesystem: JsonableFilesystem, spatial_resolution: Optional[Tuple[int, int, int]] = None): self.outer_path = outer_path self.inner_path = inner_path self.filesystem = filesystem binfile = filesystem.openbin(outer_path.as_posix()) # FIXME: h5py might not like this if the filesystem isn't OSFS f = h5py.File(binfile, "r") #type: ignore try: dataset = f[inner_path.as_posix()] if not isinstance(dataset, h5py.Dataset): raise ValueError(f"{inner_path} is not a Dataset") self.axiskeys = self.getAxisKeys(dataset) self._dataset = dataset tile_shape = Shape5D.create(raw_shape=self._dataset.chunks or self._dataset.shape, axiskeys=self.axiskeys) base_url = Url.parse(filesystem.geturl(outer_path.as_posix())) assert base_url is not None super().__init__( c_axiskeys_on_disk=self.axiskeys, tile_shape=tile_shape, interval=Shape5D.create( raw_shape=self._dataset.shape, axiskeys=self.axiskeys).to_interval5d(location), dtype=self._dataset.dtype, spatial_resolution=spatial_resolution or (1, 1, 1), # FIXME filesystem=filesystem, path=self.outer_path) except Exception as e: f.close() raise e
def __init__( self, key: PurePosixPath, size: Tuple[int, int, int], resolution: Tuple[int, int, int], voxel_offset: Optional[Tuple[int, int, int]], chunk_sizes: Tuple[Tuple[int, int, int], ...], encoding: PrecomputedChunksEncoder, ) -> None: self.key = PurePosixPath(key.as_posix().lstrip("/")) self.size = size self.resolution = resolution self.voxel_offset = (0, 0, 0) if voxel_offset is None else voxel_offset self.chunk_sizes = chunk_sizes self.encoding = encoding super().__init__()
class BetterCSVDataSet(AbstractDataSet): def __init__(self, filepath, load_args=None, save_args=None): self._filepath = PurePosixPath(filepath) self.load_args = load_args self.save_args = save_args def _load(self) -> pd.DataFrame: if self.load_args is None: self.load_args = {} return pd.read_csv(self._filepath, **self.load_args) def _save(self, df: pd.DataFrame) -> None: if self.save_args is None: self.save_args = {} df.to_csv(str(self._filepath), **self.save_args) def _exists(self) -> bool: return Path(self._filepath.as_posix()).exists() def _describe(self): return self.__dict__
class BZ2TextDataSet(AbstractDataSet): def __init__(self, filepath, load_args=None, save_args=None): self._filepath = PurePosixPath(filepath) self.load_args = load_args def _load(self) -> pd.DataFrame: if self.load_args is None: self.load_args = {} with bz2.open(self._filepath, **self.load_args) as f: return f.readlines() def _save(self, obj): with open(self._filepath, "wb") as f: f.write(obj) def _exists(self) -> bool: return Path(self._filepath.as_posix()).exists() def _describe(self): return self.__dict__
def create_n5( array: Array5D, *, axiskeys: Optional[str] = None, chunk_size: Shape5D, compression: N5Compressor = RawCompressor() ): path = PurePosixPath(tempfile.mkstemp()[1] + ".n5") sink = N5DatasetSink( outer_path=path, inner_path=PurePosixPath("/data"), filesystem=OsFs("/"), attributes=N5DatasetAttributes( dimensions=array.shape, blockSize=chunk_size, c_axiskeys=axiskeys or array.axiskeys, dataType=array.dtype, compression=compression, ) ) sink_writer = sink.create() assert not isinstance(sink_writer, Exception) for tile in array.split(chunk_size): sink_writer.write(tile) return path.as_posix()
class AppendableCSVDataSet(AbstractDataSet): """``AppendableCSVDataSet`` loads/saves data from/to a local CSV file opened in append mode. It uses pandas to handle the CSV file. Example: :: >>> from kedro.extras.datasets.pandas import AppendableCSVDataSet >>> from kedro.extras.datasets.pandas import CSVDataSet >>> import pandas as pd >>> >>> data_1 = pd.DataFrame({'col1': [1, 2], 'col2': [4, 5], >>> 'col3': [5, 6]}) >>> >>> data_2 = pd.DataFrame({'col1': [7, 8], 'col2': [5, 7]}) >>> >>> regular_ds = CSVDataSet(filepath="/tmp/test.csv") >>> appendable_ds = AppendableCSVDataSet( >>> filepath="/tmp/test.csv", >>> ) >>> >>> regular_ds.save(data_1) >>> appendable_ds.save(data_2) >>> reloaded = appendable_ds.load() >>> assert data_2.equals(reloaded) """ DEFAULT_LOAD_ARGS = {} # type: Dict[str, Any] DEFAULT_SAVE_ARGS = {"index": False} def __init__( self, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, ) -> None: """Creates a new instance of ``AppendableCSVDataSet`` pointing to an existing local CSV file to be opened in append mode. Args: filepath: Filepath in POSIX format to an existing local CSV file. load_args: Pandas options for loading CSV files. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html save_args: Pandas options for saving CSV files. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html All defaults are preserved, but "index", which is set to False. """ self._filepath = PurePosixPath(filepath) # Handle default load and save arguments self._load_args = deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: self._save_args.update(save_args) # Use only append mode self._save_args["mode"] = "a" def _describe(self) -> Dict[str, Any]: return dict(filepath=self._filepath, load_args=self._load_args, save_args=self._save_args) def _load(self) -> pd.DataFrame: return pd.read_csv(str(self._filepath), **self._load_args) def _save(self, data: pd.DataFrame) -> None: # pylint: disable=abstract-class-instantiated try: if self._exists(): self._save_args["header"] = False data.to_csv(str(self._filepath), **self._save_args) except FileNotFoundError: raise DataSetError(f"`{self._filepath}` CSV file not found. " f"The file cannot be opened in " f"append mode.") def _exists(self) -> bool: return Path(self._filepath.as_posix()).is_file()
def simulate_fragmentation(rbc_path: Path, mismatches: int, seed_size: int, subseed_size: int, mode: str, timeout: Optional[float] = None, cutoff: bool = True, threads: int = 0) -> Tuple[float, int]: user_id = uuid.uuid4() host_seed = secrets.token_bytes(seed_size) client_seed = corrupt_key(host_seed, mismatches) host_subseeds = [ bytes(host_seed[index:index + subseed_size]) for index in range(0, len(host_seed), subseed_size) ] client_subseeds = [ bytes(client_seed[index:index + subseed_size]) for index in range(0, len(client_seed), subseed_size) ] args = [] for host_subseed, client_subseed in zip(host_subseeds, client_subseeds): nonce = secrets.token_bytes(len(host_seed) - subseed_size) host_subkey = host_subseed + nonce client_subkey = client_subseed + nonce subargs = [host_subkey.hex()] if mode == "aes": aes = AES.new(client_subkey, AES.MODE_ECB) subargs += [aes.encrypt(user_id.bytes).hex(), str(user_id)] elif mode == "chacha20": chacha20_nonce = secrets.token_bytes(CHACHA20_NONCE_SIZE) chacha20 = ChaCha20.new(key=client_subkey, nonce=chacha20_nonce) client_key = chacha20.encrypt(user_id.bytes) iv = chacha20.nonce iv = bytes(CHACHA20_OPENSSL_NONCE_SIZE - len(iv)) + iv subargs += [client_key.hex(), user_id.hex, iv.hex()] elif mode == "ecc": client_priv_key = ECC.construct(curve=EC_CURVE, d=int.from_bytes( client_subkey, "big")) subargs += [ get_ec_public_key_bytes(client_priv_key, compress=False).hex() ] elif mode in hash_modes: h = hashlib.new(mode.replace('-', '_'), client_subkey) subargs += [h.hexdigest()] elif mode == "kang12": subargs += [KangarooTwelve(client_subkey, b'', KANG12_SIZE).hex()] else: print(f"Error: Mode '{mode}' is not recognized.", file=sys.stderr) sys.exit(1) args.append(subargs) duration = 0 key_count = 0 for subargs in args: # Call rbc_validator over wsl with the UUID, the full server subkey, the client cipher, and only # the necessary subkey size set # Only extract the stderr output in verbose mode to get the actual time taken searching in text # mode, and make sure to check if the return code was zero or not env_args = [] rbc_path_abs = rbc_path.resolve() if sys.platform == "win32": env_args.append("wsl") rbc_path_abs = PurePosixPath("/mnt", rbc_path_abs.drive[:-1].lower(), *rbc_path_abs.parts[1:]) env_args += [ rbc_path_abs.as_posix(), f"--mode={mode}", f"--subkey={subseed_size * 8}", f"--threads={threads}", "-v", "-c" ] if not cutoff: env_args.append("-a") env_args += subargs try: validator_proc = subprocess.run( env_args, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, universal_newlines=True, check=True, timeout=None if timeout is None else timeout - duration) except subprocess.TimeoutExpired: return float("inf"), key_count lines = [line for line in validator_proc.stderr.split("\n")] # Get the first line such that "Clock" is contained within it. clock_line = next(line for line in lines if re.search(r"Clock", line)) count_line = next(line for line in lines if re.search(r"searched", line)) # Get only the decimal output (in seconds) and increment duration by its value duration += float(clock_line.split(" ")[3]) key_count += int(count_line.split(" ")[3]) return duration, key_count
class AppendableExcelDataSet(AbstractDataSet): """``AppendableExcelDataSet`` loads/saves data from/to a local Excel file opened in append mode. It uses pandas to handle the Excel file. Example: :: >>> from kedro.extras.datasets.pandas import AppendableExcelDataSet >>> from kedro.extras.datasets.pandas import ExcelDataSet >>> import pandas as pd >>> >>> data_1 = pd.DataFrame({'col1': [1, 2], 'col2': [4, 5], >>> 'col3': [5, 6]}) >>> >>> data_2 = pd.DataFrame({'col1': [7, 8], 'col2': [5, 7]}) >>> >>> regular_ds = ExcelDataSet(filepath="/tmp/test.xlsx") >>> appendable_ds = AppendableExcelDataSet( >>> filepath="/tmp/test.xlsx", >>> save_args={"sheet_name": "my_sheet"}, >>> load_args={"sheet_name": "my_sheet"} >>> ) >>> >>> regular_ds.save(data_1) >>> appendable_ds.save(data_2) >>> reloaded = appendable_ds.load() >>> assert data_2.equals(reloaded) """ DEFAULT_LOAD_ARGS = {"engine": "openpyxl"} DEFAULT_SAVE_ARGS = {"index": False} def __init__( self, filepath: str, load_args: Dict[str, Any] = None, save_args: Dict[str, Any] = None, ) -> None: """Creates a new instance of ``AppendableExcelDataSet`` pointing to an existing local Excel file to be opened in append mode. Args: filepath: Filepath in POSIX format to an existing local Excel file. load_args: Pandas options for loading Excel files. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html All defaults are preserved, but "engine", which is set to "openpyxl". save_args: Pandas options for saving Excel files. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html All defaults are preserved, but "index", which is set to False. If you would like to specify options for the `ExcelWriter`, you can include them under "writer" key. Here you can find all available arguments: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html Note: `mode` option of `ExcelWriter` is set to `a` and it can not be overridden. """ self._filepath = PurePosixPath(filepath) # Handle default load and save arguments self._load_args = deepcopy(self.DEFAULT_LOAD_ARGS) if load_args is not None: self._load_args.update(load_args) save_args = deepcopy(save_args) or {} self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS) self._writer_args = save_args.pop("writer", {}) # type: Dict[str, Any] self._writer_args.setdefault("engine", "openpyxl") if save_args is not None: self._save_args.update(save_args) # Use only append mode self._writer_args["mode"] = "a" def _describe(self) -> Dict[str, Any]: return dict( filepath=self._filepath, load_args=self._load_args, save_args=self._save_args, writer_args=self._writer_args, ) def _load(self) -> pd.DataFrame: return pd.read_excel(str(self._filepath), **self._load_args) def _save(self, data: pd.DataFrame) -> None: # pylint: disable=abstract-class-instantiated try: with pd.ExcelWriter(str(self._filepath), **self._writer_args) as writer: data.to_excel(writer, **self._save_args) except FileNotFoundError as exc: raise DataSetError( f"`{self._filepath}` Excel file not found. The file cannot be opened in " f"append mode.") from exc def _exists(self) -> bool: return Path(self._filepath.as_posix()).is_file()
class DataSaver: """ Class that implements methods to save models to volumes and upload them to database """ def __init__(self, local_path, base_name, template, **kwargs): """ :param local_path: :param base_name: :param template: :param kwargs: """ print("DEBUG", kwargs) print(template.astype(str)) self.LOCAL_PATH = PurePosixPath(local_path) self.model_name = base_name + "_" + "_".join(template.astype(str)) self.model_name = self.model_name + "_" + "_".join( list(map(str, kwargs.values()))) self.local_path_to_model = self.LOCAL_PATH / self.model_name # TODO: Hash functions for model's names to distinguish them def _check_paths(self): """ Checks local paths and tries to create them if they don't exist. Throws exception if failed. :return: True if path exists """ if path.exists(self.LOCAL_PATH): return None else: print( "-> Creating local mounted path inside the container. Pay attention!" ) try: makedirs(self.LOCAL_PATH.as_posix()) except Exception: raise Exception("Could not create mounted path") def _checks_connections(self): """ Checks :return: """ return None def save_to_volume(self, obj): """ Save to local disk to docker-volume :return: """ self._check_paths() path_to_model = self.LOCAL_PATH / self.model_name with open(path_to_model, "wb") as f: dump(obj, f) def _load_to_database(self): """ Loads data to database :return: """ pass
def do_save_project(filesystem: BucketFs, file_path: PurePosixPath, workflow_contents: bytes): with filesystem.openbin(file_path.as_posix(), "w") as f: f.write(workflow_contents)
def absolute(self, path: PurePosixPath): path = '/' / path path = (self._basedir / path.as_posix()[1:]) #type: Path path.relative_to(self._basedir) return path
def save_project(self, fs: JsonableFilesystem, path: PurePosixPath) -> int: with fs.openbin(path.as_posix(), "w") as f: return f.write(self.get_ilp_contents())
def do_load_project_bytes(filesystem: BucketFs, file_path: PurePosixPath) -> bytes: with filesystem.openbin(file_path.as_posix(), "r") as f: return f.read()
class ClusterRun: def __init__(self, array_run_obj, anat_file_path, physio_file_path, suffix=""): try: self.cluster_workspace = PurePosixPath( parameter_finder(array_run_obj.anatomy_df, 'cluster_workspace')) except NameError: raise ParameterNotFoundError( "cluster_workspace is not defined for running CxSystem on cluster" ) assert self.cluster_workspace.is_absolute(), \ "cluster_workspace {} must be an absolute path with explicit [remote] home directory path".format(self.cluster_workspace.as_posix()) try: self.cluster_address = parameter_finder(array_run_obj.anatomy_df, 'cluster_address') except NameError: raise ParameterNotFoundError( "cluster_address is not defined for running CxSystem on cluster" ) try: self.cluster_login_node = parameter_finder( array_run_obj.anatomy_df, 'cluster_login_node') except NameError: print( " - No cluster login node found. Directly conencting to cluster address {}" .format(self.cluster_address)) self.cluster_login_node = '--' # # the following call will check if the cluster is available or not, but it needs root access # self.ping_cluster() try: self.cluster_username = parameter_finder(array_run_obj.anatomy_df, 'cluster_username') assert self.cluster_username != 'username', \ "Cluster username must be changed in the configuration file, currently it is the default value 'username'" print(" - Loggin in with user '%s'" % self.cluster_username) except NameError: self.cluster_username = input(' - Enter cluster username: '******'password') except NameError: if 'CLUSTERPASS' in os.environ.keys(): self.password = os.environ['CLUSTERPASS'] else: self.password = getpass.getpass( ' - Enter password for user {}: ' ''.format(self.cluster_username)) self.suffix = suffix print(" - temp file suffix is %s" % self.suffix) self.client = paramiko.SSHClient() self.client.load_system_host_keys() self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) if self.cluster_login_node != '--': print(" - Connecting to login node {}".format( self.cluster_login_node)) sock = paramiko.ProxyCommand("ssh {}@{} nc {} 22".format( self.cluster_username, self.cluster_login_node, self.cluster_address)) sock.settimeout(30) self.client.connect(self.cluster_address, port=22, username=self.cluster_username, password=self.password, sock=sock) else: self.client.connect(self.cluster_address, port=22, username=self.cluster_username, password=self.password) print(" - Connected to %s" % self.cluster_address) print(" - Creating workspace folder if not exists") self.ssh_commander('mkdir -p {}'.format( self.cluster_workspace.as_posix())) scp = SCPClient(self.client.get_transport()) if 'json' in anat_file_path.suffix.lower(): converter = fileconverter.ConfigConverter( anat_file_path.as_posix()) anat_file_path = Path(converter.save_as_csv(overwrite=True)) if 'json' in physio_file_path.suffix.lower(): converter = fileconverter.ConfigConverter( physio_file_path.as_posix()) physio_file_path = Path(converter.save_as_csv(overwrite=True)) print(" - Transferring configuration files ...") self.remote_anat_filename = '_tmp_anat_config{}.csv'.format( self.suffix) self.remote_phys_filename = '_tmp_physio_config{}.csv'.format( self.suffix) self.local_workspace_unexpanded = Path( parameter_finder(array_run_obj.anatomy_df, 'workspace_path')) self.local_workspace = Path( parameter_finder(array_run_obj.anatomy_df, 'workspace_path')).expanduser() self.local_cluster_folder = self.local_workspace.joinpath( 'cluster_run' + self.suffix) if not self.local_cluster_folder.is_dir(): os.mkdir(self.local_cluster_folder.as_posix()) try: imported_connections_file = Path( parameter_finder(array_run_obj.anatomy_df, 'import_connections_from')) if imported_connections_file.is_file(): scp.put(imported_connections_file.as_posix(), self.cluster_workspace.as_posix()) new_path = Path('./').joinpath( imported_connections_file.name).as_posix() change_parameter_value_in_file( anat_file_path.as_posix(), self.local_cluster_folder.joinpath(anat_file_path.name), 'import_connections_from', new_path) anat_file_path = self.local_cluster_folder.joinpath( anat_file_path.name) except TypeError: # this is when the value is # or -- for instance pass scp.put( anat_file_path.as_posix(), self.cluster_workspace.joinpath( self.remote_anat_filename).as_posix()) scp.put( physio_file_path.as_posix(), self.cluster_workspace.joinpath( self.remote_phys_filename).as_posix()) # ask user to set the number of nodes, time and memory: print( " - Please check the default csc_puhti.job file and set the time, memory and uncomment and enter email address if you wish." "\nNote that the number of nodes in default slurm file should always be set to 1." " Instead you should enter the number of nodes in the CxSystem network config file. " "\nAlso the default number of CPUs=16 does not need to be changed most of the times. " ) self.slurm_file_path = Path( parameter_finder(array_run_obj.anatomy_df, 'cluster_job_file_path')).expanduser() if not self.slurm_file_path.is_file(): if not self.slurm_file_path.is_absolute(): raise RelativePathError( "\nSlurm file {} not found in local workspace. Make sure the path to the file is " "absolute".format(self.slurm_file_path.as_posix())) else: raise FileNotFoundError("\nSlurm file {} not found".format( self.slurm_file_path.as_posix())) # updating remote cxsystem2 self.update_remote_cxsystem2(self.slurm_file_path, self.cluster_workspace) # building slurm : for item_idx, item in enumerate(array_run_obj.clipping_indices): with open(self.slurm_file_path.as_posix(), 'r') as sl1: remote_slurm_filename = "_tmp_slurm{}_part{}.job".format( self.suffix, item_idx) with open( self.local_cluster_folder.joinpath( remote_slurm_filename).as_posix(), 'w') as sl2: # wb -> w for line in sl1: sl2.write(line) try: sl2.write( "python -c " "\"from cxsystem2.core.cxsystem import CxSystem as cxs; " " cx = cxs('{anatomy}','{physio}', cluster_run_start_idx={cluster_start},cluster_run_step={cluster_step}); " "cx.run()\"".format( anatomy=self.remote_anat_filename, physio=self.remote_phys_filename, cluster_start=item, cluster_step=array_run_obj.clipping_indices[ item_idx + 1] - array_run_obj.clipping_indices[item_idx])) except IndexError: sl2.write( "python -c " "\"from cxsystem2.core.cxsystem import CxSystem as cxs; " " cx = cxs('{anatomy}','{physio}', cluster_run_start_idx={cluster_start},cluster_run_step={cluster_step}); " "cx.run()\"".format( anatomy=self.remote_anat_filename, physio=self.remote_phys_filename, cluster_start=item, cluster_step=array_run_obj.total_configs - array_run_obj.clipping_indices[item_idx])) scp.put( self.local_cluster_folder.joinpath( remote_slurm_filename).as_posix(), self.cluster_workspace.joinpath( remote_slurm_filename).as_posix()) print(" - Slurm file generated and copied to cluster") self.channel = self.client.invoke_shell() for item_idx, item in enumerate(array_run_obj.clipping_indices): remote_slurm_filename = "_tmp_slurm{}_part{}.job".format( self.suffix, item_idx) if platform == 'win32': print(" - Converting the file using dos2unix") self.channel.send('cd {} && dos2unix {}\n'.format( self.cluster_workspace.as_posix(), remote_slurm_filename)) time.sleep(1) self.channel.send('cd {} && sbatch {}\n'.format( self.cluster_workspace.as_posix(), remote_slurm_filename)) print(" - Job file {} submitted".format(remote_slurm_filename)) time.sleep(1) cluster_metadata = \ {'cluster_address': self.cluster_address, 'cluster_login_node': self.cluster_login_node, 'cluster_username': self.cluster_username, 'local_workspace_unexpanded': self.local_workspace_unexpanded.as_posix(), 'local_workspace': self.local_workspace.as_posix(), 'local_cluster_run_folder': self.local_cluster_folder.as_posix(), 'local_cluster_run_download_folder': self.local_cluster_folder.joinpath('downloads'), 'cluster_workspace': self.cluster_workspace.as_posix(), 'cluster_simulation_folder': self.cluster_workspace.joinpath(parameter_finder(array_run_obj.anatomy_df, 'simulation_title')).as_posix(), 'suffix': self.suffix, 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-7]} with open( self.local_cluster_folder.joinpath( 'cluster_metadata{}.pkl'.format(self.suffix)), 'wb') as ff: pickle.dump(cluster_metadata, ff) print( " - Cluster metadata saved. To download the result and clean the environments after getting the email," " run the following command in the terminal:\n") print("cxcluster " + self.local_cluster_folder.joinpath( 'cluster_metadata{}.pkl'.format(self.suffix)).as_posix()) def ping_cluster(self): try: # check if the cluster address is ip or hostname socket.inet_aton(self.cluster_address) cluster_ip = self.cluster_address except OSError: cluster_ip = socket.gethostbyname(self.cluster_address) p = ping(cluster_ip, timeout=3) if not p: raise ClusterNotReachableError("Cluster node is not reachable") def ssh_commander(self, command, print_flag=False): stdin, stdout, stderr = self.client.exec_command(command, get_pty=True) out = stdout.read(), if print_flag is True: print(out[0]) return out[0] def update_remote_cxsystem2(self, slurm_path, remote_workspace): slurm_path = Path(slurm_path) remote_workspace = Path(remote_workspace) module_name = self.find_remote_python_module(slurm_path) self.ssh_commander('mkdir -p {}'.format( self.cluster_workspace.as_posix())) # Query for valid cxsystem, install/update if necessary, report print(" - Checking CxSystem2 on cluster") # Should be empty string for existing git repo git_repo_error_message = self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'cd CxSystem2 ; ' 'git -C . rev-parse'.format( workspace=remote_workspace.as_posix())).decode('utf-8') if not git_repo_error_message: git_basename = self.ssh_commander( 'cd {workspace}/CxSystem2 ; ' 'git rev-parse --show-toplevel'.format( workspace=remote_workspace.as_posix())).decode('utf-8') commit_HEAD_hash = self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'cd CxSystem2 ; ' 'git rev-parse --short HEAD'.format( workspace=remote_workspace.as_posix())).decode('utf-8') git_branch = self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'cd CxSystem2 ; ' 'git rev-parse --abbrev-ref HEAD'.format( workspace=remote_workspace.as_posix())).decode('utf-8') print( f" - The git repo is {git_basename} branch is {git_branch} commit HEAD hash is {commit_HEAD_hash}" ) print(f" - No need to download/install") else: print(" - Updating CxSystem2 on cluster") print( self.ssh_commander( 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'cd {workspace} ; ' 'git clone https://github.com/VisualNeuroscience-UH/CxSystem2 ; ' 'cd CxSystem2 ; ' 'git pull ; '.format(workspace=remote_workspace.as_posix() )).decode('utf-8')) print( self.ssh_commander( 'bash -lc \'' 'source ~/.bash_profile ; ' 'source ~/.bashrc ; ' 'echo $PATH; ' 'module load {module} ;' 'cd {cxfolder} ; ' 'python -m pip install -Ue . --user\''.format( module=module_name, cxfolder=remote_workspace.joinpath( 'CxSystem2').as_posix())).decode('utf-8')) def find_remote_python_module(self, slurm_path): module_name = '' slurm_path = Path(slurm_path) with open(slurm_path.as_posix()) as f: for line in f: if 'module load' in line.lower() and 'python' in line.lower(): module_name = line.split(' ')[-1].strip('\n') print(" - Remote module name is {}".format(module_name)) return module_name
class CmdRequestHandler(BaseRequestHandler): "Handle a FTP session on Command port" def __init__(self, request, client_address, server, filesystem): self._fs = filesystem self._cwd = PurePosixPath('/') BaseRequestHandler.__init__(self, request, client_address, server) def handle(self): self.request.settimeout(30) try: self.reply(220) #Service ready for new user. while True: cmd = self.request.recv(8192).decode().strip().split( maxsplit=1) #type: List[str] if not cmd: break print(cmd) try: handler = self.__getattribute__('handle_' + cmd[0].upper()) except AttributeError: self.handle_unknown(*cmd) else: handler(*cmd[1:]) # else: # print(handler.__code__.co_argcount) # self.reply(501) except socket.timeout: #pylint: disable=E1101 pass finally: print('closed\r\n') def handle_unknown(self, *args): if args[0] in ('ACCT', 'ALLO', 'SITE'): self.reply( 202) #Command not implemented, superfluous at this site. else: self.reply(502) #Command not implemented. def handle_USER(self, user=None): #Minimum implementation # self.reply(331) #User name okay, need password. self.reply(230) #User logged in, proceed. # def handle_PASS(self, pwd=None): # self.reply(230) #User logged in, proceed. def handle_PWD(self): self.reply(257, '"%s"' % self._cwd.as_posix()) #"PATHNAME" created. def handle_CWD(self, path): path = self._cwd / path if self._fs.is_dir(path): self._cwd = path self.reply(250) #Requested file action okay, completed. else: self.reply( 550 ) #Requested action not taken. File unavailable (e.g., file not found, no access). def handle_MKD(self, path): path = self._cwd / path try: self._fs.mkdir(path) self.reply(257, '"%s"' % path.as_posix()) #"PATHNAME" created. except (ValueError, FileExistsError): self.reply(550) def handle_RMD(self, path): path = self._cwd / path self.reply(250) #Requested file action okay, completed. # def handle_SYST(self): # self.reply(215, "UNIX ") # def handle_FEAT(self): # self.reply(211) #no-features def handle_TYPE(self, typecode): #Minimum implementation self.reply(200) #200 Command okay. # def handle_MODE(self, mode): #Minimum implementation # pass # def handle_QUIT(self): #Minimum implementation # pass def handle_PORT(self, port): #Minimum implementation try: h1, h2, h3, h4, p1, p2 = map(int, port.split(',')) data_host = ('.'.join(map(str, (h1, h2, h3, h4))), p1 * 256 + p2) except ValueError: self.reply(501) else: if data_host[0] != self.request.getpeername()[0]: self.reply(501) else: self._data_host = data_host self.reply(200) #200 Command okay. # def handle_STRU(self, structure): #Minimum implementation # pass def recv_data(self): self.reply(150) #File status okay; about to open data connection. print(self._data_host) with socket.create_connection(self._data_host) as conn: while True: data = conn.recv(8192) if not data: break yield data self.reply( 226) #Closing data connection. Requested file action successful def send_data(self, iterator): self.reply(150) #File status okay; about to open data connection. with socket.create_connection(self._data_host) as conn: for data in iterator: print(data) conn.sendall(data) self.reply( 226) #Closing data connection. Requested file action successful def handle_STOR(self, path): #Minimum implementation path = self._cwd / path self._fs.write_bytes(path, self.recv_data()) def handle_RETR(self, path): #Minimum implementation path = self._cwd / path self.send_data(self._fs.read_bytes(path)) # def handle_NOOP(self): #Minimum implementation # pass def handle_SIZE(self, path): path = self._cwd / path self.reply(312, '0') def handle_LIST(self, path='.'): path = self._cwd / path if not self._fs.is_dir(path): self.reply(550) else: self.send_data(((entry + '\r\n').encode() for entry in self._fs.iterdir(path))) def reply(self, code, *args): print(code, args) self.request.send(("%d %s\r\n" % (code, " ".join(args))).encode())
class PrecomputedChunksScale: def __init__( self, key: PurePosixPath, size: Tuple[int, int, int], resolution: Tuple[int, int, int], voxel_offset: Optional[Tuple[int, int, int]], chunk_sizes: Tuple[Tuple[int, int, int], ...], encoding: PrecomputedChunksEncoder, ) -> None: self.key = PurePosixPath(key.as_posix().lstrip("/")) self.size = size self.resolution = resolution self.voxel_offset = (0, 0, 0) if voxel_offset is None else voxel_offset self.chunk_sizes = chunk_sizes self.encoding = encoding super().__init__() @classmethod def from_datasource( cls, *, datasource: DataSource, key: PurePosixPath, encoding: PrecomputedChunksEncoder) -> "PrecomputedChunksScale": return PrecomputedChunksScale( key=key, chunk_sizes=tuple([ (datasource.tile_shape.x, datasource.tile_shape.y, datasource.tile_shape.z) ]), size=(datasource.shape.x, datasource.shape.y, datasource.shape.z), resolution=datasource.spatial_resolution, voxel_offset=(datasource.location.x, datasource.location.y, datasource.location.z), encoding=encoding) def to_json_value(self) -> JsonObject: return { "key": self.key.as_posix(), "size": self.size, "resolution": self.resolution, "voxel_offset": self.voxel_offset, "chunk_sizes": self.chunk_sizes, "encoding": self.encoding.to_json_value(), } @classmethod def from_json_value(cls, value: JsonValue) -> "PrecomputedChunksScale": value_obj = ensureJsonObject(value) return PrecomputedChunksScale( key=PurePosixPath(ensureJsonString(value_obj.get("key"))), size=ensureJsonIntTripplet(value_obj.get("size")), resolution=ensureJsonIntTripplet(value_obj.get("resolution")), voxel_offset=ensureOptional(ensureJsonIntTripplet, value_obj.get("voxel_offset")), chunk_sizes=tuple([ ensureJsonIntTripplet(v) for v in ensureJsonArray(value_obj.get("chunk_sizes")) ]), encoding=PrecomputedChunksEncoder.from_json_value( value_obj.get("encoding")), ) def __eq__(self, other: object) -> bool: if not isinstance(other, PrecomputedChunksScale): return False return (self.key == other.key and self.size == other.size and self.resolution == other.resolution and self.voxel_offset == other.voxel_offset and self.chunk_sizes == other.chunk_sizes and self.encoding == other.encoding)