def copy_cookiecutter_resume(template_name='cookiecutter-easydata'): """Make a copy of the cookiecutter replay file in the generated project. By default, cookiecutter creates a replay directory in a user's ~/.cookiecutter directory. This hook creates a YAML version of those values in the generated project. This can be used to regenerate the project by doing a: >>> cookiecutter --config_file path/to/cookiecutter-easydata.yaml cookiecutter-easydata """ config_obj = get_user_config() config_dir = pathlib.Path(config_obj['replay_dir']) src_path = config_dir / f'{template_name}.json' yml_path = f'.{template_name}.yml' # relative to root of generated project logger.debug(f"Reading cookiecutter replay data from {src_path}") with open(src_path) as f: cookiecutter_opts = json.load(f) yaml_opts = {k:v for k,v in sorted(cookiecutter_opts['cookiecutter'].items()) if not k.startswith('_')} yaml = YAML() yaml.default_flow_style=False yaml.width=4096 yaml.indent(offset=4) logger.debug(f"Dumping cookiecutter replay (YAML) info to {yml_path}") with open(yml_path, 'w') as fw: yaml.dump({'default_context': yaml_opts}, fw)
def load_settings(): yaml = YAML() if os.path.exists(DEFAULT_SETTINGS_FILE): with open(DEFAULT_SETTINGS_FILE, 'r') as f: settings = dict(yaml.load(f)) if settings['whitelist-enabled'] and settings['blacklist-enabled']: log('whitelist and blacklist cannot be enabled simultaneously') return None # Paths starting and ending with '/' for i, w in enumerate(settings['whitelist-files']): if w[0] != '/': settings['whitelist-files'][i] = '/' + w if w[-1] != '/': settings['whitelist-files'][ i] = settings['whitelist-files'][i] + '/' for i, w in enumerate(settings['blacklist-files']): if w[0] != '/': settings['blacklist-files'][i] = '/' + w if w[-1] != '/': settings['blacklist-files'][i] = settings[i] + '/' return settings with open(DEFAULT_SETTINGS_FILE, 'w+') as f: yaml.dump(DEFAULT_SETTINGS, f) return DEFAULT_SETTINGS
def _save(self): 'save configuration in file cache to be restored' resource_name = self._get_one_resource_value() namespace = self.client.get_namespace( self._get_kube_api_resource_type(), resource_name) kube_instance = self._get_one_resource_value("kube") if not kube_instance: kube_instance = 'local' cache_folder = os.path.join(settings.OMC_KUBE_CACHE_DIR, kube_instance, namespace, self._get_kube_resource_type()) result = self._read_namespaced_resource(resource_name, namespace, _preload_content=False) stream = StringIO() the_result = json.loads(result.data.decode('UTF-8')) ObjectUtils.delete_node(the_result, 'metadata.creationTimestamp') ObjectUtils.delete_node(the_result, 'metadata.resourceVersion') yaml = YAML() yaml.dump(the_result, stream) content = stream.getvalue() make_directory(cache_folder) with open(os.path.join(cache_folder, resource_name + '.yaml'), 'w') as f: f.write(content)
def copy_cookiecutter_resume(template_name='easydata'): """Make a copy of the cookiecutter replay file in the generated project. By default, cookiecutter creates a replay directory in a user's ~/.cookiecutter directory. This is largely useless. Easydata dumps this data to the generated project (also as json) using a jsonify call, but this doesn't yet help us regenerate the project automatically. This hook creates a YAML version of those values in the generated project. This can be used to regenerate the project by doing a: >>> cookiecutter --config_file path/to/.easydata.yaml easydata """ # relative to root of generated project src_path = f'.{template_name}.json' yml_path = f'.{template_name}.yml' logger.debug(f"Reading cookiecutter replay data from {src_path}") with open(src_path) as f: cookiecutter_opts = json.load(f) yaml_opts = {k:v for k,v in sorted(cookiecutter_opts.items()) if not k.startswith('_')} yaml = YAML() yaml.default_flow_style=False yaml.width=4096 yaml.indent(offset=4) logger.debug(f"Dumping cookiecutter replay (YAML) info to {yml_path}") with open(yml_path, 'w') as fw: yaml.dump({'default_context': yaml_opts}, fw)
def generate_hpsearch(): # TODO documentation yaml = YAML() parent_file_path = Path(args.input_file) parent_yaml = yaml.load(parent_file_path) hpsearch_config = parent_yaml['hpsearch'] if hpsearch_config.get('is_child', False): raise RuntimeError( 'This YAML is itself a child config generated for an hyperparameter search.' ) assert hpsearch_config['type'] == 'random_uniform' description = hpsearch_config[ 'desc'] if 'desc' in hpsearch_config else 'hpsearch' for trial_idx in range(hpsearch_config['n_trials']): child_yaml = make_child_config(parent_yaml) for p in hpsearch_config['params']: value = generate_random_value(p) set_item_at_path(child_yaml, p['param'], value) child_file_path = parent_file_path.with_name( parent_file_path.stem + '_{}_{}.yaml'.format(description, trial_idx)) yaml.dump(child_yaml, child_file_path)
def yaml(self): 'get configuration in yaml format' resource = self._get_one_resource_value() namespace = self.client.get_namespace( self._get_kube_api_resource_type(), resource) result = self._read_namespaced_resource(resource, namespace) stream = StringIO() yaml = YAML() yaml.dump(result, stream) console.log(stream.getvalue())
def main(): count = 0 yaml = YAML() for post in glob.glob(os.path.join(POSTS_PATH, "*.md")): git_lastmod = subprocess.check_output([ "git", "log", "-1", "--pretty=%ad", "--date=short", post]).strip() if not git_lastmod: continue frontmatter, line_num = get_yaml(post) meta = yaml.load(frontmatter) if 'sitemap' in meta: if 'lastmod' in meta['sitemap']: if meta['sitemap']['lastmod'] == git_lastmod: continue meta['sitemap']['lastmod'] = git_lastmod else: meta['sitemap'].insert(0, 'lastmod', git_lastmod) else: meta.insert(line_num, 'sitemap', {'lastmod': git_lastmod}) output = 'new.md' if os.path.isfile(output): os.remove(output) with open(output, 'w') as new, open(post, 'r') as old: new.write("---\n") yaml.dump(meta, new) new.write("---\n") line_num += 2 lines = old.readlines() for line in lines: if line_num > 0: line_num -= 1 continue else: new.write(line) shutil.move(output, post) count += 1 print("[INFO] update 'lastmod' for: '{}'".format(post)) print("[NOTICE] Success! Update all posts's lastmod.\n") if count > 0: subprocess.call(["git", "add", POSTS_PATH]) subprocess.call(["git", "commit", "-m", "[Automation] Update lastmod of post(s)."])
def to_yaml(self) -> str: """ Output the run description as a yaml string """ YAML = self._ruamel_importer() yaml = YAML() with io.StringIO() as stream: yaml.dump(self.serialize(), stream=stream) output = stream.getvalue() return output
def dump(self, data, stream=None, **kwargs): """ Output data to a given stream. If no stream is given, then the data is returned as a string. """ inefficient = False if stream is None: inefficient = True stream = StringIO() YAML.dump(self, data, stream, **kwargs) if inefficient: return stream.getvalue()
class MSPInitializer: # set all file names that are required yml_file_name_tmp = "config_tmp.yml" yml_file_name = "config.yml" default_yml_name = "ms_analysis_default.yml" go_path = "go_terms" pathway_path = "pathways" possible_gos = sorted([ x for x in os.listdir(os.path.join(path_package_config, go_path)) if x.endswith(".txt") ]) possible_pathways = sorted([ x for x in os.listdir(os.path.join(path_package_config, pathway_path)) if x.endswith(".txt") ]) def __init__(self, dir_: str, file_path_yml: Optional[str] = None, loglevel=logging.DEBUG): self.logger = get_logger(self.__class__.__name__, loglevel=loglevel) # create a yaml file reader self.yaml = YAML() # self.yaml.indent(mapping=2, sequence=4, offset=2) self.yaml.indent(offset=2) self.yaml.default_flow_style = False self.yaml.width = 4096 # attributes that change upon changing the starting dir self.configs = {} self.reader_data = {} self.interesting_proteins, self.go_analysis_gene_names = None, None # properties self._start_dir = None self._file_path_yaml = None # set the specified dirs self.start_dir = dir_ if file_path_yml is not None: self.file_path_yaml = file_path_yml @property def start_dir(self): return self._start_dir @start_dir.setter def start_dir(self, start_dir): start_dir = os.path.normpath(start_dir) # make sure to be on the right level and set starting dir if os.path.split(start_dir)[1] == "txt": self.logger.debug("Removing txt ending from path") self._start_dir = os.path.split(start_dir)[0] else: self._start_dir = start_dir self.logger.info(f"Starting dir: {self.start_dir}") # set all attributes back None that where file specific self.configs = {} self.reader_data = {} self.file_path_yaml = "file" @property def path_config(self): return os.path.join(self.start_dir, "config") @property def file_path_yaml(self): return self._file_path_yaml @file_path_yaml.setter def file_path_yaml(self, file_path_yml: str): """ Parameters ---------- file_path_yml can be either: - "default" - "file" - a path to a yml file Raises ------ ValueError if no valid value was provided FileNotFoundError if the file specified by the file_path_yml was not found """ if file_path_yml.lower() == "default": self._file_path_yaml = self.get_default_yml_path() elif file_path_yml.lower() == "file": if self.has_yml_file(): self._file_path_yaml = os.path.join( self.start_dir, "config", MSPInitializer.yml_file_name) else: self._file_path_yaml = self.get_default_yml_path() elif file_path_yml.lower().endswith(('.yml', '.yaml')): self._file_path_yaml = os.path.normpath(file_path_yml) else: raise ValueError( f"Invalid value provided for yaml file: {file_path_yml}") self.logger.debug("yml file location: %s", self._file_path_yaml) # load the config from the yml file self.logger.info("loading yml file") with open(self.file_path_yaml) as f: self.configs = self.yaml.load(f) self.logger.debug(f"Config file contents: {self.configs}") def init_config(self): """ Creates the directory to save the configuration file and saves the configuration """ os.makedirs(self.path_config, exist_ok=True) self.update_config_file() def has_yml_file(self) -> bool: if not os.path.isdir(self.start_dir): return False if "config" in os.listdir(self.start_dir): self.logger.debug("Found config dir") config_dir = os.path.join(self.start_dir, "config") if MSPInitializer.yml_file_name in os.listdir(config_dir): self.logger.debug("Found config.yml file in config dir") return True return False def get_default_yml_path(self) -> str: self.logger.debug( "Loading default yml file from: %s, since no (valid) file was selected", path_package) return os.path.join(path_package_config, MSPInitializer.default_yml_name) def init_interest_from_txt( self) -> Tuple[Dict[str, list], Dict[str, list]]: dict_pathway = {} dict_go = {} for pathway in self.configs.get("pathways"): name, proteins = self.read_config_txt_file( MSPInitializer.pathway_path, pathway) dict_pathway[name] = proteins for go in self.configs.get("go_terms"): name, proteins = self.read_config_txt_file(MSPInitializer.go_path, go) dict_go[name] = proteins return dict_pathway, dict_go def read_config_txt_file(self, path, file) -> Tuple[str, list]: fullpath = os.path.join(path_package_config, path, file) if path == MSPInitializer.pathway_path: with open(fullpath) as f: name = f.readline().strip() f.readline() proteins = [] for line in f: proteins.append(line.strip()) elif path == MSPInitializer.go_path: name = file.replace(".txt", "") with open(fullpath) as f: proteins = [] for line in f: proteins.append(line.strip()) else: raise ValueError(f"Invalid path: {path}") return name, proteins def update_config_file(self): # store the config file as tmp self.logger.debug("Updating yml settings file") yml_file_loc_tmp = os.path.join(self.path_config, MSPInitializer.yml_file_name_tmp) with open(yml_file_loc_tmp, "w") as outfile: self.yaml.dump(self.configs, outfile) # delete non tmp if exists yml_file_loc = os.path.join(self.path_config, MSPInitializer.yml_file_name) if MSPInitializer.yml_file_name in os.listdir(self.path_config): os.remove(yml_file_loc) # rename to non tmp os.rename(yml_file_loc_tmp, yml_file_loc) def read_data(self): for Reader in BaseReader.__subclasses__(): Reader: Type[BaseReader] # for IDE hints try: reader = Reader(self.start_dir, self.configs.get(Reader.name, {})) self.configs[str(Reader.name)] = deepcopy(reader.reader_config) self.reader_data[Reader.name] = reader.full_data except MissingFilesException: self.logger.debug("No files found for reader: %s", Reader.name) # read all proteins and receptors of interest from the config dir self.logger.info("Reading proteins and receptors of interest") self.interesting_proteins, self.go_analysis_gene_names = self.init_interest_from_txt( ) self.update_config_file()
class MSPInitializer: """ | An initializer class which is responsible for creating the directory to save the default YAML configuration file as well as reading and saving the specified settings. | The initializer also operates as a means of passing stored configurations to the plotter classes. """ # set all file names that are required yml_file_name_tmp = "config_tmp.yml" yml_file_name = "config.yml" default_yml_name = "ms_analysis_default.yml" go_path = "go_terms" pathway_path = "pathways" possible_gos = sorted([ x for x in os.listdir(os.path.join(path_package_config, go_path)) if x.endswith(".txt") ]) possible_pathways = sorted([ x for x in os.listdir(os.path.join(path_package_config, pathway_path)) if x.endswith(".txt") ]) def __init__(self, path: str, file_path_yml: Optional[str] = None, loglevel=logging.DEBUG): """ Parameters ---------- path location where the directory/txt folder to the data can be found. file_path_yml path to the yaml config file loglevel level of the logger """ self.logger = get_logger(self.__class__.__name__, loglevel=loglevel) # create a yaml file reader self.yaml = YAML() self.yaml.indent(mapping=2, sequence=4, offset=2) # self.yaml.indent(offset=2) self.yaml.default_flow_style = False self.yaml.width = 4096 # attributes that change upon changing the starting dir #: configurations for the run. also saved configurations for the reader under the respective reader name self.configs = {} self.reader_data = {} self.interesting_proteins, self.go_analysis_gene_names = None, None # properties self._start_dir = None self._file_path_yaml = None # list to store all selectable terms; custom and provided self.list_full_gos = [] self.list_full_pathways = [] # set the specified dirs self.start_dir = path if file_path_yml is not None: self.file_path_yaml = file_path_yml @property def start_dir(self): return self._start_dir @start_dir.setter def start_dir(self, start_dir): start_dir = os.path.normpath(start_dir) # make sure to be on the right level and set starting dir if os.path.split(start_dir)[1] == "txt": self.logger.debug("Removing txt ending from path") self._start_dir = os.path.split(start_dir)[0] else: self._start_dir = start_dir self.logger.info(f"Starting dir: {self.start_dir}") # set all attributes back None that where file specific self.configs = {} self.reader_data = {} self.file_path_yaml = "file" # see if any custom lists can be found self.list_full_gos = [] try: self.list_full_gos += [ x for x in os.listdir(os.path.join(self._start_dir, "go_terms")) if os.path.isfile(os.path.join(self._start_dir, "go_terms", x)) ] except FileNotFoundError: pass self.list_full_gos += MSPInitializer.possible_gos self.list_full_pathways = [] try: self.list_full_pathways += [ x for x in os.listdir(os.path.join(self._start_dir, "pathways")) if os.path.isfile(os.path.join(self._start_dir, "pathways", x)) ] except FileNotFoundError: pass self.list_full_pathways += MSPInitializer.possible_pathways @property def path_config(self): return os.path.join(self.start_dir, "config") @property def file_path_yaml(self): """ Setting the yaml file path will set the configurations of the class to the ones specified in the file. Note ----- The value can be set to either: - "default" - "file" - a path to a yml file Raises ------ ValueError if no valid value was provided FileNotFoundError if the file specified by the file_path_yml was not found """ return self._file_path_yaml @file_path_yaml.setter def file_path_yaml(self, file_path_yml: str): if file_path_yml.lower() == "default": self._file_path_yaml = self.get_default_yml_path() elif file_path_yml.lower() == "file": if self.has_yml_file(): self._file_path_yaml = os.path.join( self.start_dir, "config", MSPInitializer.yml_file_name) else: self._file_path_yaml = self.get_default_yml_path() elif file_path_yml.lower().endswith(('.yml', '.yaml')): self._file_path_yaml = os.path.normpath(file_path_yml) else: raise ValueError( f"Invalid value provided for yaml file: {file_path_yml}") self.logger.debug("yml file location: %s", self._file_path_yaml) # load the config from the yml file self.logger.info("loading yml file") with open(self.file_path_yaml) as f: self.configs = self.yaml.load(f) self.logger.debug(f"Config file contents: {self.configs}") def init_config(self): """ | Creates the directory to save the configuration file if not present, updates and saves the configuration. | The function is usually applied to ensure that configs are provided to the initializer in order to avoid problems initializing the file reader with :meth:`read_data`. """ os.makedirs(self.path_config, exist_ok=True) self.update_config_file() def has_yml_file(self) -> bool: if not os.path.isdir(self.start_dir): return False if "config" in os.listdir(self.start_dir): self.logger.debug("Found config dir") config_dir = os.path.join(self.start_dir, "config") if MSPInitializer.yml_file_name in os.listdir(config_dir): self.logger.debug("Found config.yml file in config dir") return True return False def get_default_yml_path(self) -> str: self.logger.debug( "Loading default yml file from: %s, since 'default' or no (valid) file was selected", path_package) return os.path.join(path_package_config, MSPInitializer.default_yml_name) def init_interest_from_txt( self) -> Tuple[Dict[str, list], Dict[str, list]]: dict_pathway = {} dict_go = {} for pathway in self.configs.get("pathways"): name, proteins = self.read_config_txt_file(pathway) dict_pathway[name] = proteins for go in self.configs.get("go_terms"): name, proteins = self.read_config_txt_file(go, False) dict_go[name] = proteins return dict_pathway, dict_go def read_config_txt_file(self, file, is_pathway: bool = True) -> Tuple[str, list]: path_full = os.path.join(self.start_dir, "pathways" if is_pathway else "go_terms", file) if not os.path.isfile(path_full): path_full = os.path.join(path_package_config, "pathways" if is_pathway else "go_terms", file) if not os.path.isfile(path_full): raise FileNotFoundError( f"The selected file: {file} cannot be found.") name = file.replace(".txt", "") with open(path_full) as f: proteins = [] for line in f: proteins.append(line.strip()) return name, proteins def update_config_file(self): # store the config file as tmp self.logger.debug("Updating yml settings file") yml_file_loc_tmp = os.path.join(self.path_config, MSPInitializer.yml_file_name_tmp) with open(yml_file_loc_tmp, "w") as outfile: self.yaml.dump(self.configs, outfile) # delete non tmp if exists yml_file_loc = os.path.join(self.path_config, MSPInitializer.yml_file_name) if MSPInitializer.yml_file_name in os.listdir(self.path_config): os.remove(yml_file_loc) # rename to non tmp os.rename(yml_file_loc_tmp, yml_file_loc) def read_data(self): """ | Initiates the file reader by providing the directory and the configs to the reader. | The configs for the reader are taken from the configs from the name of the reader as key. E.g. mqreader. | In turn a :class:`~mspypeline.helpers.Utils.DataDict` is generated to provide the mapping to the input data (*reader_data*) for the further analysis with the :ref:`mspypeline plotters <plotters>`. """ for Reader in BaseReader.__subclasses__(): Reader: Type[BaseReader] # for IDE hints try: reader = Reader(self.start_dir, self.configs.get(Reader.name, {})) self.configs[str(Reader.name)] = deepcopy(reader.reader_config) self.reader_data[Reader.name] = reader.full_data except MissingFilesException: self.logger.debug("No files found for reader: %s", Reader.name) # read all proteins and receptors of interest from the config dir self.logger.info("Reading pathway and GO list of interest") self.interesting_proteins, self.go_analysis_gene_names = self.init_interest_from_txt( ) self.update_config_file()
def make_config_yaml(): """ Process that thakes all the default parameters from a string and saves it als a config.yalm file. Return: None """ # !!!! The intendations in doc must be alligned to the far left otherwise the yaml file looks ugly doc = """ # Configuration File experiment_name: test experiment_nr: 0 # Setup do_setup: False # If True, edit the setup() code for each new dataset do_make_config: False # If True, overwrite the config.yaml file with default one. workers: 16 # nr of cores used in multiprocess. Max=16 seed: 42 # Seed for replication # The mean and std for using pretrained weights # Ex: Resnet: mean:[0.485, 0.456, 0.406] and std: [0.229, 0.224, 0.225] mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] # Image settings image_size: 512 # size of the square image n_samples: 3 # nr of images per label # balance_type; how to balance the labels. # 0: take n_samples. if not enough, continue. Can lead to imbalanced labels # 1: take n_samples. if not enough, duplicate images till each label has n_samples images # 2: sample all labels for an number of images equal to the amount of images in the smallest label # 3: take n_samples. if not enough, augment images till each label has n_samples images balance_type: 1 load_ext: 'jpeg' # png is lossless save_ext: 'png' # The extension dictates the compression algorithm # list of image preprocesses # augm # augement dataset. Must be first # hist # autocrop # autocrop_in # autocrop but with square inside the eye-circle to reduce size # resize # minmax # scales array to [0, 1] # stdize # centers a minmax array around 0 with unit variance # gray # sift # pca preprocess: # - augm # - hist # - autocrop # - resize # - gray # - minmax # - sift # - pca # Paths path: /mnt/Datasets/kaggle_diabetic_retinopathy/ # path to dataset for the project. path_src_ds: /mnt/Datasets/kaggle_diabetic_retinopathy/0_original/ # path to source dataset path_dst_ds: /mnt/Datasets/kaggle_diabetic_retinopathy/experiments/ # path to destination """ yaml = YAML(typ='rt') yaml.indent(mapping=2, sequence=4, offset=4) yaml_doc = yaml.load(doc) with open('config.yaml', 'w') as f: yaml.dump(yaml_doc, f)
cont = recipe for p in path[:-1]: cont = cont[p] if isinstance(cont, list): for n in range(len(cont)): del cont[n][path[-1]] else: del cont[path[-1]] if 'add' in case: dict_merge(recipe, case['add']) recipe_dir = recipes_folder.mkdir(recipe_data['folder']) with recipe_dir.join('meta.yaml').open('w') as fdes: yaml.dump( recipe, fdes, transform=lambda l: l.replace('#{%', '{%').replace("#{{", "{{")) if 'add_files' in case: for fname, data in case['add_files'].items(): with recipe_dir.join(fname).open('w') as fdes: fdes.write(data) if 'move_files' in case: for src, dest in case['move_files'].items(): src_path = recipe_dir.join(src) if not dest: if os.path.isdir(src_path): shutil.rmtree(src_path) else: os.remove(src_path)
"""Prepares a recipe from recipe_data in recipes_folder""" recipe = deepcopy(recipe_data['meta.yaml']) if 'remove' in case: for remove in utils.ensure_list(case['remove']): path = remove.split('/') cont = recipe for p in path[:-1]: cont = cont[p] if isinstance(cont, list): for n in range(len(cont)): del cont[n][path[-1]] else: del cont[path[-1]] if 'add' in case: dict_merge(recipe, case['add']) recipe_folder = op.join(recipes_folder, recipe_data['folder']) os.mkdir(recipe_folder) with open(op.join(recipe_folder, 'meta.yaml'), "w") as meta_file: yaml.dump(recipe, meta_file, transform=lambda l: l.replace('#{%', '{%').replace("#{{", "{{")) if 'add_files' in case: for fname, data in case['add_files'].items(): with open(op.join(recipe_folder, fname), "w") as out: out.write(data) yield recipe_folder