예제 #1
0
    def __init__(self, gene_source, tf_source=None, alias_source=None,
                 filter_havana=False, protein_coding=False, known_only=False):

        # Class Objects
        self.gene_list = []  # Represents gene annotation.
        self.tf_list = []  # Represents TF PWM annotation.
        self.alias_dict = dict()  # Gene Symbol or other IDs -> ENSEMBL ID
        self.symbol_dict = dict()  # ENSEMBL ID -> Official gene symbol

        # Initializing Required Field - Gene List
        if isinstance(gene_source, list):  # It can be a matrix - Used by internal methods.
            self.gene_list = gene_source
        if isinstance(gene_source, str):  # It can be a string.
            if os.path.isfile(gene_source):  # The string may represent a path to a gtf file.
                # FTT for TDF True
                # filter_havana = False
                protein_coding = False
                known_only = False
                self.load_gene_list(gene_source,
                                    filter_havana=filter_havana,
                                    protein_coding=protein_coding,
                                    known_only=known_only)
            else:  # The string may represent an organism which points to a gtf file within data.config.
                genome_data = GenomeData(gene_source)
                self.load_gene_list(genome_data.get_annotation(),
                                    filter_havana=filter_havana,
                                    protein_coding=protein_coding,
                                    known_only=known_only)

        # Initializing Optional Field - TF List
        if tf_source:
            if isinstance(tf_source, list):
                if isinstance(tf_source[0], list):  # It can be a matrix
                    self.tf_list = tf_source
                else:
                    mtf_file_list = []
                    motif_data = MotifData()
                    for e in tf_source:
                        if os.path.isfile(e):  # It can be a path to a mtf file.
                            mtf_file_list.append(e)
                        else:  # It can represent an organism which points to an mtf file within data.config.
                            mtf_file = motif_data.get_mtf_path(e)
                            mtf_file_list.append(mtf_file)
                    self.load_tf_list(mtf_file_list)
            else:
                pass  # TODO Throw error.

        # Initializing Optional Field - Alias Dictionary
        if alias_source:
            if isinstance(alias_source, dict):  # It can be a dictionary - Used by internal methods.
                self.alias_dict = alias_source
            if isinstance(alias_source, str):  # It can be a string.
                if os.path.isfile(alias_source):  # The string may represent a path to a txt alias file.
                    self.load_alias_dict(alias_source)
                else:  # The string may represent an organism which points to a txt alias file within data.config.
                    genome_data = GenomeData(alias_source)
                    self.load_alias_dict(genome_data.get_gene_alias())
            else:
                pass  # TODO Throw error
예제 #2
0
    def __init__(self, gene_source, tf_source=None, alias_source=None, 
                 filter_havana=False, protein_coding=False, known_only=False):

        # Class Objects
        self.gene_list = [] # Represents gene annotation.
        self.tf_list = [] # Represents TF PWM annotation.
        self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID
        self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol

        # Initializing Required Field - Gene List
        if isinstance(gene_source,list): # It can be a matrix - Used by internal methods.
            self.gene_list = gene_source
        if isinstance(gene_source,str): # It can be a string.
            if os.path.isfile(gene_source): # The string may represent a path to a gtf file.
                # FTT for TDF True
                #filter_havana = False
                protein_coding = False
                known_only = False
                self.load_gene_list(gene_source, 
                                    filter_havana=filter_havana, 
                                    protein_coding=protein_coding,
                                    known_only=known_only)
            else: # The string may represent an organism which points to a gtf file within data.config.
                genome_data = GenomeData(gene_source)
                self.load_gene_list(genome_data.get_annotation(), 
                                    filter_havana=filter_havana, 
                                    protein_coding=protein_coding,
                                    known_only=known_only)

        # Initializing Optional Field - TF List
        if tf_source:
            if isinstance(tf_source, list):
                if isinstance(tf_source[0], list): # It can be a matrix
                    self.tf_list = tf_source
                else:
                    mtf_file_list = []
                    motif_data = MotifData()
                    for e in tf_source:
                        if os.path.isfile(e): # It can be a path to a mtf file.
                            mtf_file_list.append(e)
                        else: # It can represent an organism which points to an mtf file within data.config.
                            mtf_file = motif_data.get_mtf_path(e)
                            mtf_file_list.append(mtf_file)
                    self.tf_list = self.load_tf_list(mtf_file_list)
            else: pass # TODO Throw error.

        # Initializing Optional Field - Alias Dictionary
        if alias_source:
            if isinstance(alias_source,dict): # It can be a dictionary - Used by internal methods.
                self.alias_dict = alias_source
            if isinstance(alias_source,str): # It can be a string.
                if os.path.isfile(alias_source): # The string may represent a path to a txt alias file.
                    self.load_alias_dict(alias_source)
                else: # The string may represent an organism which points to a txt alias file within data.config.
                    genome_data = GenomeData(alias_source)
                    self.load_alias_dict(genome_data.get_gene_alias())
            else: pass # TODO Throw error
예제 #3
0
    def __init__(self, gene_source, tf_source=None, alias_source=None):
        """
        Initializes AnnotationSet.

        Keyword arguments:
        gene_source -- Gene source annotation. It will be used to create the gene_list
                       element. It can be:
            * A matrix (list of lists): An AnnotationSet will be created based on such
                 matrix.
            * A string representing a gtf file: An AnnotationSet will be created based
                 on such gtf file.
            * A string representing an organism: An AnnotationSet will be created based
                 on the gtf file for that organism in data.config file.

        tf_source -- TF source annotation. After initialization, this object is mapped with 
                     gene_list. It can be:
            * A matrix (list of lists): Represents a final tf_list element.
            * A list of mtf files: The tf_list will be created based on all mtf files.
            * A list of repositories: The tf_list will be created based on the mtf files
                associated with such repositories in data.config.

        alias_source -- Alias dictionary source annotation. It can be:
            * A dictionary: An alias dictionary will be created based on such dictionary.
            * A string representing a alias (txt) file: An alias dictionary will be created
                 based on such txt file.
            * A string representing an organism: An alias dictionary will be created based
                 on the txt file for that organism in data.config file.
        """

        # Class Objects
        self.gene_list = []  # Represents gene annotation.
        self.tf_list = []  # Represents TF PWM annotation.
        self.alias_dict = dict()  # Gene Symbol or other IDs -> ENSEMBL ID
        self.symbol_dict = dict()  # ENSEMBL ID -> Official gene symbol

        # Initializing Required Field - Gene List
        if (isinstance(
                gene_source,
                list)):  # It can be a matrix - Used by internal methods.
            self.gene_list = gene_source
        if (isinstance(gene_source, str)):  # It can be a string.
            if (os.path.isfile(gene_source)
                ):  # The string may represent a path to a gtf file.
                self.load_gene_list(gene_source, filter_havana=False)
            else:  # The string may represent an organism which points to a gtf file within data.config.
                genome_data = GenomeData(gene_source)
                self.load_gene_list(genome_data.get_gencode_annotation(),
                                    filter_havana=False)

        # Initializing Optional Field - TF List
        if (tf_source):
            if (isinstance(tf_source, list)):
                if (isinstance(tf_source[0], list)):  # It can be a matrix
                    self.tf_list = tf_source
                else:
                    mtf_file_list = []
                    motif_data = MotifData()
                    for e in tf_source:
                        if (os.path.isfile(e)
                            ):  # It can be a path to a mtf file.
                            mtf_file_list.append(e)
                        else:  # It can represent an organism which points to an mtf file within data.config.
                            mtf_file = motif_data.get_mtf_path(e)
                            mtf_file_list.append(mtf_file)
                    self.tf_list = self.load_tf_list(mtf_file_list)
            else:
                pass  # TODO Throw error.

        # Initializing Optional Field - Alias Dictionary
        if (alias_source):
            if (isinstance(alias_source, dict)
                ):  # It can be a dictionary - Used by internal methods.
                self.alias_dict = alias_source
            if (isinstance(alias_source, str)):  # It can be a string.
                if (os.path.isfile(alias_source)
                    ):  # The string may represent a path to a txt alias file.
                    self.load_alias_dict(alias_source)
                else:  # The string may represent an organism which points to a txt alias file within data.config.
                    genome_data = GenomeData(alias_source)
                    self.load_alias_dict(genome_data.get_gene_alias())
            else:
                pass  # TODO Throw error
예제 #4
0
    def __init__(self, gene_source, tf_source=None, alias_source=None):
        """
        Initializes AnnotationSet.

        Keyword arguments:
        gene_source -- Gene source annotation. It will be used to create the gene_list
                       element. It can be:
            * A matrix (list of lists): An AnnotationSet will be created based on such
                 matrix.
            * A string representing a gtf file: An AnnotationSet will be created based
                 on such gtf file.
            * A string representing an organism: An AnnotationSet will be created based
                 on the gtf file for that organism in data.config file.

        tf_source -- TF source annotation. After initialization, this object is mapped with 
                     gene_list. It can be:
            * A matrix (list of lists): Represents a final tf_list element.
            * A list of mtf files: The tf_list will be created based on all mtf files.
            * A list of repositories: The tf_list will be created based on the mtf files
                associated with such repositories in data.config.

        alias_source -- Alias dictionary source annotation. It can be:
            * A dictionary: An alias dictionary will be created based on such dictionary.
            * A string representing a alias (txt) file: An alias dictionary will be created
                 based on such txt file.
            * A string representing an organism: An alias dictionary will be created based
                 on the txt file for that organism in data.config file.
        """

        # Class Objects
        self.gene_list = [] # Represents gene annotation.
        self.tf_list = [] # Represents TF PWM annotation.
        self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID
        self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol

        # Initializing Required Field - Gene List
        if(isinstance(gene_source,list)): # It can be a matrix - Used by internal methods.
            self.gene_list = gene_source
        if(isinstance(gene_source,str)): # It can be a string.
            if(os.path.isfile(gene_source)): # The string may represent a path to a gtf file.
                self.load_gene_list(gene_source, filter_havana=False, protein_coding=True,
                                    known_only=True)
            else: # The string may represent an organism which points to a gtf file within data.config.
                genome_data = GenomeData(gene_source)
                self.load_gene_list(genome_data.get_gencode_annotation(), filter_havana=False, protein_coding=True,
                                    known_only=True)

        # Initializing Optional Field - TF List
        if(tf_source):
            if(isinstance(tf_source,list)):
                if(isinstance(tf_source[0],list)): # It can be a matrix
                    self.tf_list = tf_source
                else:
                    mtf_file_list = []
                    motif_data = MotifData()
                    for e in tf_source:
                        if(os.path.isfile(e)): # It can be a path to a mtf file.
                            mtf_file_list.append(e)
                        else: # It can represent an organism which points to an mtf file within data.config.
                            mtf_file = motif_data.get_mtf_path(e)
                            mtf_file_list.append(mtf_file)
                    self.tf_list = self.load_tf_list(mtf_file_list)
            else: pass # TODO Throw error.

        # Initializing Optional Field - Alias Dictionary
        if(alias_source):
            if(isinstance(alias_source,dict)): # It can be a dictionary - Used by internal methods.
                self.alias_dict = alias_source
            if(isinstance(alias_source,str)): # It can be a string.
                if(os.path.isfile(alias_source)): # The string may represent a path to a txt alias file.
                    self.load_alias_dict(alias_source)
                else: # The string may represent an organism which points to a txt alias file within data.config.
                    genome_data = GenomeData(alias_source)
                    self.load_alias_dict(genome_data.get_gene_alias())
            else: pass # TODO Throw error