def _get_schema_source(schema_source, piface_dir=namespaces["looper"]["piface_dir"]): # Stolen from piface object; should be a better way to do this... if is_url(schema_source): return schema_source elif not os.path.isabs(schema_source): schema_source = os.path.join(piface_dir, schema_source) return schema_source
def get_asset_dir_contents(rgc, genome, asset, tag): """ Get the asset directory contents into a list :param refgenconf.RefGenConf rgc: config :param str genome: genome name :param str asset: asset name :param str tag: tag name :return list[str]: list of files in the asset directory """ # returns 'default' for nonexistent genome/asset; no need to catch tag = tag or rgc.get_default_tag(genome, asset) file_name = TEMPLATE_ASSET_DIR_CONTENTS.format(asset, tag) path, remote = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) if is_url(path): _LOGGER.debug(f"Asset dir contents path is a URL: {path}") dir_contents = send_data_request(url=path) elif os.path.exists(path): _LOGGER.debug(f"Asset dir contents path is a file: {path}") with open(path) as f: dir_contents = load(f) else: raise TypeError(f"Path is neither a valid URL nor an existing file: {path}") _LOGGER.debug(f"Asset dir contents: {dir_contents}") return dir_contents
def load_yaml(filepath): """Load a yaml file into a python dict""" def read_yaml_file(filepath): """ Read a YAML file :param str filepath: path to the file to read :return dict: read data """ with open(filepath, "r") as f: data = yaml.safe_load(f) return data if is_url(filepath): _LOGGER.debug(f"Got URL: {filepath}") try: # python3 from urllib.error import HTTPError from urllib.request import urlopen except: # python2 from urllib2 import URLError as HTTPError from urllib2 import urlopen try: response = urlopen(filepath) except HTTPError as e: raise e data = response.read() # a `bytes` object text = data.decode("utf-8") return yaml.safe_load(text) else: return read_yaml_file(filepath)
def copy_or_download_file(input_string, outfolder): """ Given an input file, which can be a local file or a URL, and output folder, this downloads or copies the file into the output folder. :param str input_string: Can be either a URL or a path to a local file :param str outfolder: Where to store the result. :return str, str: output/result file and command """ result_file = os.path.join(outfolder, os.path.basename(input_string)) parts = ["wget -O", result_file, input_string] \ if is_url(input_string) else ["cp", input_string, result_file] return result_file, " ".join(parts)
def load_remote_registry_path(bulker_config, registry_path, filepath=None): cratevars = parse_registry_path(registry_path) if cratevars: # assemble the query string if 'registry_url' in bulker_config.bulker: base_url = bulker_config.bulker.registry_url else: # base_url = "http://big.databio.org/bulker/" base_url = DEFAULT_BASE_URL query = cratevars["crate"] if cratevars["tag"] != "default": query = query + "_" + cratevars["tag"] if not cratevars["namespace"]: cratevars["namespace"] = "bulker" # default namespace query = cratevars["namespace"] + "/" + query # Until we have an API: query = query + ".yaml" if not filepath: filepath = os.path.join(base_url, query) else: _LOGGER.error( "Unable to parse registry path: {}".format(registry_path)) sys.exit(1) if is_url(filepath): _LOGGER.debug("Got URL: {}".format(filepath)) try: #python3 from urllib.request import urlopen from urllib.error import HTTPError except: #python2 from urllib2 import urlopen from urllib2 import URLError as HTTPError try: response = urlopen(filepath) except HTTPError as e: if cratevars: _LOGGER.error( "The requested remote manifest '{}' is not found.".format( filepath)) sys.exit(1) else: raise e data = response.read() # a `bytes` object text = data.decode('utf-8') manifest_lines = yacman.YacAttMap(yamldata=text) else: manifest_lines = yacman.YacAttMap(filepath=filepath) return manifest_lines, cratevars
def add_protocol_mapping(self, protocol, loc): """ :param str protocol: protocol key :param str loc: path to an existing pipeline interface file """ path = expandpath(loc) if not os.path.exists(path): if not is_url(loc): _LOGGER.warning("Ignoring nonexistent pipeline interface " "location: {}".format(loc)) return else: if protocol in self[PROTOMAP_KEY]: _LOGGER.info("Overwriting existing protocol mapping with: " "{}:{}".format(protocol, loc)) self[PROTOMAP_KEY].update({protocol: loc})
def get_pipeline_schemas(self, schema_key=INPUT_SCHEMA_KEY): """ Get path to the pipeline schema. :param str schema_key: where to look for schemas in the pipeline iface :return str: absolute path to the pipeline schema file """ schema_source = None if schema_key in self: schema_source = self[schema_key] if schema_source: _LOGGER.debug("Got schema source: {}".format(schema_source)) if is_url(schema_source): return schema_source elif not os.path.isabs(schema_source): schema_source = os.path.join( os.path.dirname(self.pipe_iface_file), schema_source) return schema_source
def _ensure_path_absolute(maybe_relpath, cfg_path): """ Ensure that a possibly relative path is absolute. """ if not isinstance(maybe_relpath, str): raise TypeError( "Attempting to ensure non-text value is absolute path: {} ({})". format(maybe_relpath, type(maybe_relpath))) if os.path.isabs(maybe_relpath) or is_url(maybe_relpath): _LOGGER.debug("Already absolute") return maybe_relpath # Maybe we have env vars that make the path absolute? expanded = os.path.expanduser(os.path.expandvars(maybe_relpath)) if os.path.isabs(expanded): _LOGGER.debug("Expanded: {}".format(expanded)) return expanded # Set path to an absolute path, relative to project config. config_dirpath = os.path.dirname(cfg_path) _LOGGER.debug("config_dirpath: {}".format(config_dirpath)) abs_path = os.path.join(config_dirpath, maybe_relpath) _LOGGER.debug("Expanded and/or made absolute: {}".format(abs_path)) return abs_path