def __init__(self, gene_source, tf_source=None, alias_source=None, filter_havana=False, protein_coding=False, known_only=False): # Class Objects self.gene_list = [] # Represents gene annotation. self.tf_list = [] # Represents TF PWM annotation. self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol # Initializing Required Field - Gene List if isinstance(gene_source, list): # It can be a matrix - Used by internal methods. self.gene_list = gene_source if isinstance(gene_source, str): # It can be a string. if os.path.isfile(gene_source): # The string may represent a path to a gtf file. # FTT for TDF True # filter_havana = False protein_coding = False known_only = False self.load_gene_list(gene_source, filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) else: # The string may represent an organism which points to a gtf file within data.config. genome_data = GenomeData(gene_source) self.load_gene_list(genome_data.get_annotation(), filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) # Initializing Optional Field - TF List if tf_source: if isinstance(tf_source, list): if isinstance(tf_source[0], list): # It can be a matrix self.tf_list = tf_source else: mtf_file_list = [] motif_data = MotifData() for e in tf_source: if os.path.isfile(e): # It can be a path to a mtf file. mtf_file_list.append(e) else: # It can represent an organism which points to an mtf file within data.config. mtf_file = motif_data.get_mtf_path(e) mtf_file_list.append(mtf_file) self.load_tf_list(mtf_file_list) else: pass # TODO Throw error. # Initializing Optional Field - Alias Dictionary if alias_source: if isinstance(alias_source, dict): # It can be a dictionary - Used by internal methods. self.alias_dict = alias_source if isinstance(alias_source, str): # It can be a string. if os.path.isfile(alias_source): # The string may represent a path to a txt alias file. self.load_alias_dict(alias_source) else: # The string may represent an organism which points to a txt alias file within data.config. genome_data = GenomeData(alias_source) self.load_alias_dict(genome_data.get_gene_alias()) else: pass # TODO Throw error
def __init__(self, gene_source, tf_source=None, alias_source=None, filter_havana=False, protein_coding=False, known_only=False): # Class Objects self.gene_list = [] # Represents gene annotation. self.tf_list = [] # Represents TF PWM annotation. self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol # Initializing Required Field - Gene List if isinstance(gene_source,list): # It can be a matrix - Used by internal methods. self.gene_list = gene_source if isinstance(gene_source,str): # It can be a string. if os.path.isfile(gene_source): # The string may represent a path to a gtf file. # FTT for TDF True #filter_havana = False protein_coding = False known_only = False self.load_gene_list(gene_source, filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) else: # The string may represent an organism which points to a gtf file within data.config. genome_data = GenomeData(gene_source) self.load_gene_list(genome_data.get_annotation(), filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) # Initializing Optional Field - TF List if tf_source: if isinstance(tf_source, list): if isinstance(tf_source[0], list): # It can be a matrix self.tf_list = tf_source else: mtf_file_list = [] motif_data = MotifData() for e in tf_source: if os.path.isfile(e): # It can be a path to a mtf file. mtf_file_list.append(e) else: # It can represent an organism which points to an mtf file within data.config. mtf_file = motif_data.get_mtf_path(e) mtf_file_list.append(mtf_file) self.tf_list = self.load_tf_list(mtf_file_list) else: pass # TODO Throw error. # Initializing Optional Field - Alias Dictionary if alias_source: if isinstance(alias_source,dict): # It can be a dictionary - Used by internal methods. self.alias_dict = alias_source if isinstance(alias_source,str): # It can be a string. if os.path.isfile(alias_source): # The string may represent a path to a txt alias file. self.load_alias_dict(alias_source) else: # The string may represent an organism which points to a txt alias file within data.config. genome_data = GenomeData(alias_source) self.load_alias_dict(genome_data.get_gene_alias()) else: pass # TODO Throw error
def __init__(self, gene_source, tf_source=None, alias_source=None): """ Initializes AnnotationSet. Keyword arguments: gene_source -- Gene source annotation. It will be used to create the gene_list element. It can be: * A matrix (list of lists): An AnnotationSet will be created based on such matrix. * A string representing a gtf file: An AnnotationSet will be created based on such gtf file. * A string representing an organism: An AnnotationSet will be created based on the gtf file for that organism in data.config file. tf_source -- TF source annotation. After initialization, this object is mapped with gene_list. It can be: * A matrix (list of lists): Represents a final tf_list element. * A list of mtf files: The tf_list will be created based on all mtf files. * A list of repositories: The tf_list will be created based on the mtf files associated with such repositories in data.config. alias_source -- Alias dictionary source annotation. It can be: * A dictionary: An alias dictionary will be created based on such dictionary. * A string representing a alias (txt) file: An alias dictionary will be created based on such txt file. * A string representing an organism: An alias dictionary will be created based on the txt file for that organism in data.config file. """ # Class Objects self.gene_list = [] # Represents gene annotation. self.tf_list = [] # Represents TF PWM annotation. self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol # Initializing Required Field - Gene List if (isinstance( gene_source, list)): # It can be a matrix - Used by internal methods. self.gene_list = gene_source if (isinstance(gene_source, str)): # It can be a string. if (os.path.isfile(gene_source) ): # The string may represent a path to a gtf file. self.load_gene_list(gene_source, filter_havana=False) else: # The string may represent an organism which points to a gtf file within data.config. genome_data = GenomeData(gene_source) self.load_gene_list(genome_data.get_gencode_annotation(), filter_havana=False) # Initializing Optional Field - TF List if (tf_source): if (isinstance(tf_source, list)): if (isinstance(tf_source[0], list)): # It can be a matrix self.tf_list = tf_source else: mtf_file_list = [] motif_data = MotifData() for e in tf_source: if (os.path.isfile(e) ): # It can be a path to a mtf file. mtf_file_list.append(e) else: # It can represent an organism which points to an mtf file within data.config. mtf_file = motif_data.get_mtf_path(e) mtf_file_list.append(mtf_file) self.tf_list = self.load_tf_list(mtf_file_list) else: pass # TODO Throw error. # Initializing Optional Field - Alias Dictionary if (alias_source): if (isinstance(alias_source, dict) ): # It can be a dictionary - Used by internal methods. self.alias_dict = alias_source if (isinstance(alias_source, str)): # It can be a string. if (os.path.isfile(alias_source) ): # The string may represent a path to a txt alias file. self.load_alias_dict(alias_source) else: # The string may represent an organism which points to a txt alias file within data.config. genome_data = GenomeData(alias_source) self.load_alias_dict(genome_data.get_gene_alias()) else: pass # TODO Throw error
def __init__(self, gene_source, tf_source=None, alias_source=None): """ Initializes AnnotationSet. Keyword arguments: gene_source -- Gene source annotation. It will be used to create the gene_list element. It can be: * A matrix (list of lists): An AnnotationSet will be created based on such matrix. * A string representing a gtf file: An AnnotationSet will be created based on such gtf file. * A string representing an organism: An AnnotationSet will be created based on the gtf file for that organism in data.config file. tf_source -- TF source annotation. After initialization, this object is mapped with gene_list. It can be: * A matrix (list of lists): Represents a final tf_list element. * A list of mtf files: The tf_list will be created based on all mtf files. * A list of repositories: The tf_list will be created based on the mtf files associated with such repositories in data.config. alias_source -- Alias dictionary source annotation. It can be: * A dictionary: An alias dictionary will be created based on such dictionary. * A string representing a alias (txt) file: An alias dictionary will be created based on such txt file. * A string representing an organism: An alias dictionary will be created based on the txt file for that organism in data.config file. """ # Class Objects self.gene_list = [] # Represents gene annotation. self.tf_list = [] # Represents TF PWM annotation. self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol # Initializing Required Field - Gene List if(isinstance(gene_source,list)): # It can be a matrix - Used by internal methods. self.gene_list = gene_source if(isinstance(gene_source,str)): # It can be a string. if(os.path.isfile(gene_source)): # The string may represent a path to a gtf file. self.load_gene_list(gene_source, filter_havana=False, protein_coding=True, known_only=True) else: # The string may represent an organism which points to a gtf file within data.config. genome_data = GenomeData(gene_source) self.load_gene_list(genome_data.get_gencode_annotation(), filter_havana=False, protein_coding=True, known_only=True) # Initializing Optional Field - TF List if(tf_source): if(isinstance(tf_source,list)): if(isinstance(tf_source[0],list)): # It can be a matrix self.tf_list = tf_source else: mtf_file_list = [] motif_data = MotifData() for e in tf_source: if(os.path.isfile(e)): # It can be a path to a mtf file. mtf_file_list.append(e) else: # It can represent an organism which points to an mtf file within data.config. mtf_file = motif_data.get_mtf_path(e) mtf_file_list.append(mtf_file) self.tf_list = self.load_tf_list(mtf_file_list) else: pass # TODO Throw error. # Initializing Optional Field - Alias Dictionary if(alias_source): if(isinstance(alias_source,dict)): # It can be a dictionary - Used by internal methods. self.alias_dict = alias_source if(isinstance(alias_source,str)): # It can be a string. if(os.path.isfile(alias_source)): # The string may represent a path to a txt alias file. self.load_alias_dict(alias_source) else: # The string may represent an organism which points to a txt alias file within data.config. genome_data = GenomeData(alias_source) self.load_alias_dict(genome_data.get_gene_alias()) else: pass # TODO Throw error