def __init__(self, reference_name, annotation_name, annotation_version=None, decompress_on_download=False, copy_local_files_to_cache=False, install_string_function=None, cache_directory_path=None): """ Parameters ---------- reference_name : str Name of reference genome annotation_name : str Name of annotation database annotation_version : str or int, optional Version or release of annotation database decompress_on_download : bool, optional If downloading a .fa.gz file, should we automatically expand it into a decompressed FASTA file? copy_local_files_to_cache : bool, optional If file is on the local file system, should we still copy it into the cache? install_string_function : fn, optional Function which returns an error message with install instructions. If not provided then the error tells the user what data is missing without install instructions. cache_directory_path : str, optional Where to place downloaded and temporary files, by default inferred from reference name, annotation name, annotation version, and the global cache directory determined by datacache. """ self.reference_name = reference_name self.annotation_name = annotation_name self.annotation_version = annotation_version # using hidden member variable _cache_directory path since access to # to the visible cache_directory_path (no underscore!) is combined # with ensuring that the directpry actually exists if cache_directory_path: self._cache_directory_path = cache_directory_path else: self.cache_subdirectory = cache_subdirectory( reference_name=reference_name, annotation_name=annotation_name, annotation_version=annotation_version) self._cache_directory_path = datacache.get_data_dir( subdir=self.cache_subdirectory) self.decompress_on_download = decompress_on_download self.copy_local_files_to_cache = copy_local_files_to_cache self.install_string_function = install_string_function
def fm_index_path(genome): """ Returns a path for cached reference peptides, for the given genome. """ # if $VAXRANK_REF_PEPTIDES_DIR is set, that'll be the location of the cache cache_dir = get_data_dir(envkey='VAXRANK_REF_PEPTIDES_DIR') if not os.path.exists(cache_dir): os.makedirs(cache_dir) return os.path.join( cache_dir, '%s_%d_%d.fm' % (genome.species.latin_name, genome.release, 2 if six.PY2 else 3))
def make_blastdb(url, name=None, filename=None, overwrite=False): """Download protein sequences and a make blast db. Uses datacache module.""" import datacache cachedir = datacache.get_data_dir() blastdb = os.path.join(cachedir, name) if os.path.exists(blastdb+'.phr') and overwrite==False: #print ('blast files found') return blastdb filename = datacache.fetch_file(url, filename=filename, decompress=True, subdir=None) #print filename cmd = 'makeblastdb -dbtype prot -in %s -out %s' %(filename,blastdb) #print cmd tmp=subprocess.check_output(cmd, shell=True) return blastdb
def __init__( self, reference_name, annotation_name, annotation_version=None, decompress_on_download=False, copy_local_files_to_cache=False, install_string_function=None, cache_directory_path=None): """ Parameters ---------- reference_name : str Name of reference genome annotation_name : str Name of annotation database annotation_version : str or int, optional Version or release of annotation database decompress_on_download : bool, optional If downloading a .fa.gz file, should we automatically expand it into a decompressed FASTA file? copy_local_files_to_cache : bool, optional If file is on the local file system, should we still copy it into the cache? install_string_function : fn, optional Function which returns an error message with install instructions. If not provided then the error tells the user what data is missing without install instructions. cache_directory_path : str, optional Where to place downloaded and temporary files, by default inferred from reference name, annotation name, annotation version, and the global cache directory determined by datacache. """ self.reference_name = reference_name self.annotation_name = annotation_name self.annotation_version = annotation_version # using hidden member variable _cache_directory path since access to # to the visible cache_directory_path (no underscore!) is combined # with ensuring that the directpry actually exists if cache_directory_path: self._cache_directory_path = cache_directory_path else: self.cache_subdirectory = cache_subdirectory( reference_name=reference_name, annotation_name=annotation_name, annotation_version=annotation_version) # If `CACHE_DIR_ENV_KEY` is set, the cache will be saved there self._cache_directory_path = datacache.get_data_dir( subdir=self.cache_subdirectory, envkey=CACHE_DIR_ENV_KEY) self.decompress_on_download = decompress_on_download self.copy_local_files_to_cache = copy_local_files_to_cache self.install_string_function = install_string_function