def __init__(self, gene_source, tf_source=None, alias_source=None, filter_havana=False, protein_coding=False, known_only=False): # Class Objects self.gene_list = [] # Represents gene annotation. self.tf_list = [] # Represents TF PWM annotation. self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol # Initializing Required Field - Gene List if isinstance(gene_source, list): # It can be a matrix - Used by internal methods. self.gene_list = gene_source if isinstance(gene_source, str): # It can be a string. if os.path.isfile(gene_source): # The string may represent a path to a gtf file. # FTT for TDF True # filter_havana = False protein_coding = False known_only = False self.load_gene_list(gene_source, filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) else: # The string may represent an organism which points to a gtf file within data.config. genome_data = GenomeData(gene_source) self.load_gene_list(genome_data.get_annotation(), filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) # Initializing Optional Field - TF List if tf_source: if isinstance(tf_source, list): if isinstance(tf_source[0], list): # It can be a matrix self.tf_list = tf_source else: mtf_file_list = [] motif_data = MotifData() for e in tf_source: if os.path.isfile(e): # It can be a path to a mtf file. mtf_file_list.append(e) else: # It can represent an organism which points to an mtf file within data.config. mtf_file = motif_data.get_mtf_path(e) mtf_file_list.append(mtf_file) self.load_tf_list(mtf_file_list) else: pass # TODO Throw error. # Initializing Optional Field - Alias Dictionary if alias_source: if isinstance(alias_source, dict): # It can be a dictionary - Used by internal methods. self.alias_dict = alias_source if isinstance(alias_source, str): # It can be a string. if os.path.isfile(alias_source): # The string may represent a path to a txt alias file. self.load_alias_dict(alias_source) else: # The string may represent an organism which points to a txt alias file within data.config. genome_data = GenomeData(alias_source) self.load_alias_dict(genome_data.get_gene_alias()) else: pass # TODO Throw error
def __init__(self, gene_source, tf_source=None, alias_source=None, filter_havana=False, protein_coding=False, known_only=False): # Class Objects self.gene_list = [] # Represents gene annotation. self.tf_list = [] # Represents TF PWM annotation. self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol # Initializing Required Field - Gene List if isinstance(gene_source,list): # It can be a matrix - Used by internal methods. self.gene_list = gene_source if isinstance(gene_source,str): # It can be a string. if os.path.isfile(gene_source): # The string may represent a path to a gtf file. # FTT for TDF True #filter_havana = False protein_coding = False known_only = False self.load_gene_list(gene_source, filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) else: # The string may represent an organism which points to a gtf file within data.config. genome_data = GenomeData(gene_source) self.load_gene_list(genome_data.get_annotation(), filter_havana=filter_havana, protein_coding=protein_coding, known_only=known_only) # Initializing Optional Field - TF List if tf_source: if isinstance(tf_source, list): if isinstance(tf_source[0], list): # It can be a matrix self.tf_list = tf_source else: mtf_file_list = [] motif_data = MotifData() for e in tf_source: if os.path.isfile(e): # It can be a path to a mtf file. mtf_file_list.append(e) else: # It can represent an organism which points to an mtf file within data.config. mtf_file = motif_data.get_mtf_path(e) mtf_file_list.append(mtf_file) self.tf_list = self.load_tf_list(mtf_file_list) else: pass # TODO Throw error. # Initializing Optional Field - Alias Dictionary if alias_source: if isinstance(alias_source,dict): # It can be a dictionary - Used by internal methods. self.alias_dict = alias_source if isinstance(alias_source,str): # It can be a string. if os.path.isfile(alias_source): # The string may represent a path to a txt alias file. self.load_alias_dict(alias_source) else: # The string may represent an organism which points to a txt alias file within data.config. genome_data = GenomeData(alias_source) self.load_alias_dict(genome_data.get_gene_alias()) else: pass # TODO Throw error
# gd = GenomeData(organism=genome) # print("\t"+gd.get_annotation()) # print("\tloading GenomeData... succeeds") genome = "hg38" print("Checking " + genome) annot = AnnotationSet(genome, filter_havana=False, protein_coding=True, known_only=False) # annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True) print("\tloading AnnotationSet... succeeds") promoters = annot.get_promoters() print("\tPromoters " + str(len(promoters))) gd = GenomeData(organism=genome) print("\t" + gd.get_annotation()) print("\tloading GenomeData... succeeds") # genome = "mm9" # print("Checking " + genome) # annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True) # print("\tloading AnnotationSet... succeeds") # promoters = annot.get_promoters() # print("\tPromoters "+str(len(promoters))) # gd = GenomeData(organism=genome) # print("\t"+gd.get_annotation()) # print("\tloading GenomeData... succeeds") # genome = "zv9" # print("Checking " + genome) # annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)
# print("\tloading AnnotationSet... succeeds") # promoters = annot.get_promoters() # print("\tPromoters "+str(len(promoters))) # gd = GenomeData(organism=genome) # print("\t"+gd.get_annotation()) # print("\tloading GenomeData... succeeds") genome = "hg38" print("Checking " + genome) annot = AnnotationSet(genome,filter_havana=False,protein_coding=True,known_only=False) # annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True) print("\tloading AnnotationSet... succeeds") promoters = annot.get_promoters() print("\tPromoters "+str(len(promoters))) gd = GenomeData(organism=genome) print("\t"+gd.get_annotation()) print("\tloading GenomeData... succeeds") # genome = "mm9" # print("Checking " + genome) # annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True) # print("\tloading AnnotationSet... succeeds") # promoters = annot.get_promoters() # print("\tPromoters "+str(len(promoters))) # gd = GenomeData(organism=genome) # print("\t"+gd.get_annotation()) # print("\tloading GenomeData... succeeds") # genome = "zv9" # print("Checking " + genome) # annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)