Ejemplo n.º 1
0
 def _get_value_for_key(self, key):
     if key in self.__configuration_object:
         return self.__configuration_object[key]
     else:
         msg = "MISSING configuration key '{}' in configuration file '{}'".format(
             key, self.__configuration_file)
         raise ConfigManagerException(msg)
Ejemplo n.º 2
0
 def get_api_server(self):
     self._get_logger().debug(
         "get_api_server, from configuration object '{}'".format(
             self._get_configuration_object()))
     try:
         return self._get_configuration_object()[self._CONFIG_KEY_SERVICE][
             self._CONFIG_KEY_ENSEMBL_API][self._CONFIG_KEY_SERVER]
     except Exception as e:
         raise ConfigManagerException(
             "MISSING information about Ensembl '{}.{}.{}' API server in configuration file '{}'"
             .format(self._CONFIG_KEY_SERVICE,
                     self._CONFIG_KEY_ENSEMBL_API, self._CONFIG_KEY_SERVER,
                     self._get_configuration_file()))
 def is_rewrite_local_path_ensembl_repo(self):
     """
     Find out whether we are required to overwrite the local Ensembl repository or not, in case there is an existing
     one for the same release we are collecting data from.
     :return: True if we have to rewrite it, False otherwise
     """
     try:
         return self._get_configuration_object() \
                    [self._CONFIG_KEY_DATA_DOWNLOADER] \
                    [self._CONFIG_KEY_REWRITE_LOCAL_PATH_ENSEMBL_REPO] == "True"
     except Exception as e:
         raise ConfigManagerException(
             "MISSING configuration information '{}.{}' in configuration file '{}', becuase of '{}'"
             .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                     self._CONFIG_KEY_REWRITE_LOCAL_PATH_ENSEMBL_REPO,
                     self._get_configuration_file(), str(e)))
 def get_ensembl_ftp_base_url(self):
     """
     Get the base URL for Ensembl FTP service, e.g. ftp://ftp.ensembl.org/pub/ .
     This parameter is defined in the configuration file used in the pipeline session for the Ensembl module.
     :return: a string with the configured Ensembl FTP base URL
     """
     try:
         return self._get_configuration_object() \
             [self._CONFIG_KEY_DATA_DOWNLOADER] \
             [self._CONFIG_KEY_ENSEMBL_FTP] \
             [self._CONFIG_KEY_BASE_URL]
     except Exception as e:
         raise ConfigManagerException(
             "MISSING configuration information '{}.{}.{}' in configuration file '{}', because of '{}'"
             .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                     self._CONFIG_KEY_ENSEMBL_FTP,
                     self._CONFIG_KEY_BASE_URL,
                     self._get_configuration_file(), str(e)))
    def get_folder_prefix_ensembl_release(self):
        """
        Get the prefix for the ensembl release folder, e.g. Ensembl has been making releases in folders like
        'release-89', so the prefix would be 'release-'.

        This parameter is specified in the configuration file that is used in the pipeline session for the Ensembl module.
        :return: a string with the prefix for ensembl release folder name
        """
        try:
            return self._get_configuration_object() \
                [self._CONFIG_KEY_DATA_DOWNLOADER] \
                [self._CONFIG_KEY_ENSEMBL_FTP] \
                [self._CONFIG_KEY_FOLDER_PREFIX_RELEASE]
        except Exception as e:
            raise ConfigManagerException(
                "MISSING configuration information '{}.{}.{}' in configuration file '{}', because of '{}'"
                .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                        self._CONFIG_KEY_ENSEMBL_FTP,
                        self._CONFIG_KEY_FOLDER_PREFIX_RELEASE,
                        self._get_configuration_file(), str(e)))
 def get_ensembl_gtf_file_extension(self):
     """
     Usually, GTF files have extension ".gtf" in Ensembl, but it has been included here as a configurable
     parameter just in case they change that in the future.
     :return: the file extension, very likely to be 'gtf'
     """
     try:
         return self._get_configuration_object() \
             [self._CONFIG_KEY_DATA_DOWNLOADER] \
             [self._CONFIG_KEY_ENSEMBL_FILE_NAMES] \
             [self._CONFIG_KEY_GTF_FILE] \
             [self._CONFIG_KEY_FILE_EXTENSION]
     except Exception as e:
         # TODO - Refactor this code whenever you have time, because a pattern has emerged here
         raise ConfigManagerException(
             "MISSING configuration information '{}.{}.{}.{}' in configuration file '{}', because of '{}'"
             .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                     self._CONFIG_KEY_ENSEMBL_FILE_NAMES,
                     self._CONFIG_KEY_GTF_FILE,
                     self._CONFIG_KEY_FILE_EXTENSION,
                     self._get_configuration_file(), str(e)))
    def get_folder_name_gtf(self):
        """
        Get the name for the sub-folder that contains GTF data for a particular species, e.g. it is used to work out the
        path to access GTF data for a given species, to download it from Ensembl.

        This parameter is specified in the configuration file that is used in the pipeline session for the Ensembl
        module.
        :return: name fo the sub-folder as set in the configuration file
        """
        try:
            return self._get_configuration_object() \
                [self._CONFIG_KEY_DATA_DOWNLOADER] \
                [self._CONFIG_KEY_ENSEMBL_FTP] \
                [self._CONFIG_KEY_FOLDER_NAME_GTF]
        except Exception as e:
            raise ConfigManagerException(
                "MISSING configuration information '{}.{}.{}' in configuration file '{}', because of '{}'"
                .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                        self._CONFIG_KEY_ENSEMBL_FTP,
                        self._CONFIG_KEY_FOLDER_NAME_GTF,
                        self._get_configuration_file(), str(e)))
    def get_folder_name_fasta(self):
        """
        Get the name for the folder that contains per-species fasta data, e.g. it is used to work out the path to access
        FASTA data for a given species on the Ensembl FTP service.

        This parameter is specified in the configuration file that is used in the pipeline session for the Ensembl
        module.
        :return: name of the folder as set in the configuration file
        """
        try:
            return self._get_configuration_object() \
                [self._CONFIG_KEY_DATA_DOWNLOADER] \
                [self._CONFIG_KEY_ENSEMBL_FTP] \
                [self._CONFIG_KEY_FOLDER_NAME_FASTA]
        except Exception as e:
            raise ConfigManagerException(
                "MISSING configuration information '{}.{}.{}' in configuration file '{}', because of '{}'"
                .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                        self._CONFIG_KEY_ENSEMBL_FTP,
                        self._CONFIG_KEY_FOLDER_NAME_FASTA,
                        self._get_configuration_file(), str(e)))
 def get_ensembl_gtf_file_suffixes(self):
     """
     Usually, GTF files in Ensembl have four suffixes (just before the file extension):
     '', 'chr', 'chr_patch_hapl_scaff' and 'abinitio'. But it's been set in the application as a configurable
     parameter just in case they change that (very unlikely) future.
     :return: a list of suffixes for GTF files on Ensembl
     """
     # TODO - Ensembl FTP IS NOT HOMOGENEOUS, find out the superset of suffixes
     try:
         return self._get_configuration_object() \
             [self._CONFIG_KEY_DATA_DOWNLOADER] \
             [self._CONFIG_KEY_ENSEMBL_FILE_NAMES] \
             [self._CONFIG_KEY_GTF_FILE] \
             [self._CONFIG_KEY_FILE_SUFFIXES]
     except Exception as e:
         # TODO - Refactor this code whenever you have time, because a pattern has emerged here
         raise ConfigManagerException(
             "MISSING configuration information '{}.{}.{}.{}' in configuration file '{}', because of '{}'"
             .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                     self._CONFIG_KEY_ENSEMBL_FILE_NAMES,
                     self._CONFIG_KEY_GTF_FILE,
                     self._CONFIG_KEY_FILE_SUFFIXES,
                     self._get_configuration_file(), str(e)))
    def get_ensembl_protein_sequence_file_type(self):
        """
        For protein sequence files, the file type string is usually 'pep', and it is found at the end of the file name,
        just before the suffixes, e.g. all or abinitio.

        Just in case Ensembl decides to change it in the future, it has been introduced in the software as a
        configuration defined value.
        :return: file type string for ensembl protein sequence files as specified in the configuration file
        """
        try:
            return self._get_configuration_object() \
                [self._CONFIG_KEY_DATA_DOWNLOADER] \
                [self._CONFIG_KEY_ENSEMBL_FILE_NAMES] \
                [self._CONFIG_KEY_PROTEIN_SEQUENCE_FILE] \
                [self._CONFIG_KEY_FILE_TYPE]
        except Exception as e:
            # TODO - Refactor this code whenever you have time, because a pattern has emerged here
            raise ConfigManagerException(
                "MISSING configuration information  '{}.{}.{}.{}' in configuration file '{}', because of '{}'"
                .format(self._CONFIG_KEY_DATA_DOWNLOADER,
                        self._CONFIG_KEY_ENSEMBL_FILE_NAMES,
                        self._CONFIG_KEY_PROTEIN_SEQUENCE_FILE,
                        self._CONFIG_KEY_FILE_TYPE,
                        self._get_configuration_file(), str(e)))