예제 #1
0
    def __init__(self, gene_source, tf_source=None, alias_source=None,
                 filter_havana=False, protein_coding=False, known_only=False):

        # Class Objects
        self.gene_list = []  # Represents gene annotation.
        self.tf_list = []  # Represents TF PWM annotation.
        self.alias_dict = dict()  # Gene Symbol or other IDs -> ENSEMBL ID
        self.symbol_dict = dict()  # ENSEMBL ID -> Official gene symbol

        # Initializing Required Field - Gene List
        if isinstance(gene_source, list):  # It can be a matrix - Used by internal methods.
            self.gene_list = gene_source
        if isinstance(gene_source, str):  # It can be a string.
            if os.path.isfile(gene_source):  # The string may represent a path to a gtf file.
                # FTT for TDF True
                # filter_havana = False
                protein_coding = False
                known_only = False
                self.load_gene_list(gene_source,
                                    filter_havana=filter_havana,
                                    protein_coding=protein_coding,
                                    known_only=known_only)
            else:  # The string may represent an organism which points to a gtf file within data.config.
                genome_data = GenomeData(gene_source)
                self.load_gene_list(genome_data.get_annotation(),
                                    filter_havana=filter_havana,
                                    protein_coding=protein_coding,
                                    known_only=known_only)

        # Initializing Optional Field - TF List
        if tf_source:
            if isinstance(tf_source, list):
                if isinstance(tf_source[0], list):  # It can be a matrix
                    self.tf_list = tf_source
                else:
                    mtf_file_list = []
                    motif_data = MotifData()
                    for e in tf_source:
                        if os.path.isfile(e):  # It can be a path to a mtf file.
                            mtf_file_list.append(e)
                        else:  # It can represent an organism which points to an mtf file within data.config.
                            mtf_file = motif_data.get_mtf_path(e)
                            mtf_file_list.append(mtf_file)
                    self.load_tf_list(mtf_file_list)
            else:
                pass  # TODO Throw error.

        # Initializing Optional Field - Alias Dictionary
        if alias_source:
            if isinstance(alias_source, dict):  # It can be a dictionary - Used by internal methods.
                self.alias_dict = alias_source
            if isinstance(alias_source, str):  # It can be a string.
                if os.path.isfile(alias_source):  # The string may represent a path to a txt alias file.
                    self.load_alias_dict(alias_source)
                else:  # The string may represent an organism which points to a txt alias file within data.config.
                    genome_data = GenomeData(alias_source)
                    self.load_alias_dict(genome_data.get_gene_alias())
            else:
                pass  # TODO Throw error
예제 #2
0
    def __init__(self, gene_source, tf_source=None, alias_source=None, 
                 filter_havana=False, protein_coding=False, known_only=False):

        # Class Objects
        self.gene_list = [] # Represents gene annotation.
        self.tf_list = [] # Represents TF PWM annotation.
        self.alias_dict = dict() # Gene Symbol or other IDs -> ENSEMBL ID
        self.symbol_dict = dict() # ENSEMBL ID -> Official gene symbol

        # Initializing Required Field - Gene List
        if isinstance(gene_source,list): # It can be a matrix - Used by internal methods.
            self.gene_list = gene_source
        if isinstance(gene_source,str): # It can be a string.
            if os.path.isfile(gene_source): # The string may represent a path to a gtf file.
                # FTT for TDF True
                #filter_havana = False
                protein_coding = False
                known_only = False
                self.load_gene_list(gene_source, 
                                    filter_havana=filter_havana, 
                                    protein_coding=protein_coding,
                                    known_only=known_only)
            else: # The string may represent an organism which points to a gtf file within data.config.
                genome_data = GenomeData(gene_source)
                self.load_gene_list(genome_data.get_annotation(), 
                                    filter_havana=filter_havana, 
                                    protein_coding=protein_coding,
                                    known_only=known_only)

        # Initializing Optional Field - TF List
        if tf_source:
            if isinstance(tf_source, list):
                if isinstance(tf_source[0], list): # It can be a matrix
                    self.tf_list = tf_source
                else:
                    mtf_file_list = []
                    motif_data = MotifData()
                    for e in tf_source:
                        if os.path.isfile(e): # It can be a path to a mtf file.
                            mtf_file_list.append(e)
                        else: # It can represent an organism which points to an mtf file within data.config.
                            mtf_file = motif_data.get_mtf_path(e)
                            mtf_file_list.append(mtf_file)
                    self.tf_list = self.load_tf_list(mtf_file_list)
            else: pass # TODO Throw error.

        # Initializing Optional Field - Alias Dictionary
        if alias_source:
            if isinstance(alias_source,dict): # It can be a dictionary - Used by internal methods.
                self.alias_dict = alias_source
            if isinstance(alias_source,str): # It can be a string.
                if os.path.isfile(alias_source): # The string may represent a path to a txt alias file.
                    self.load_alias_dict(alias_source)
                else: # The string may represent an organism which points to a txt alias file within data.config.
                    genome_data = GenomeData(alias_source)
                    self.load_alias_dict(genome_data.get_gene_alias())
            else: pass # TODO Throw error
예제 #3
0
# gd = GenomeData(organism=genome)
# print("\t"+gd.get_annotation())
# print("\tloading GenomeData... succeeds")

genome = "hg38"
print("Checking " + genome)
annot = AnnotationSet(genome,
                      filter_havana=False,
                      protein_coding=True,
                      known_only=False)
# annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)
print("\tloading AnnotationSet... succeeds")
promoters = annot.get_promoters()
print("\tPromoters " + str(len(promoters)))
gd = GenomeData(organism=genome)
print("\t" + gd.get_annotation())
print("\tloading GenomeData... succeeds")

# genome = "mm9"
# print("Checking " + genome)
# annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)
# print("\tloading AnnotationSet... succeeds")
# promoters = annot.get_promoters()
# print("\tPromoters "+str(len(promoters)))
# gd = GenomeData(organism=genome)
# print("\t"+gd.get_annotation())
# print("\tloading GenomeData... succeeds")

# genome = "zv9"
# print("Checking " + genome)
# annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)
예제 #4
0
# print("\tloading AnnotationSet... succeeds")
# promoters = annot.get_promoters()
# print("\tPromoters "+str(len(promoters)))
# gd = GenomeData(organism=genome)
# print("\t"+gd.get_annotation())
# print("\tloading GenomeData... succeeds")

genome = "hg38"
print("Checking " + genome)
annot = AnnotationSet(genome,filter_havana=False,protein_coding=True,known_only=False)
# annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)
print("\tloading AnnotationSet... succeeds")
promoters = annot.get_promoters()
print("\tPromoters "+str(len(promoters)))
gd = GenomeData(organism=genome)
print("\t"+gd.get_annotation())
print("\tloading GenomeData... succeeds")

# genome = "mm9"
# print("Checking " + genome)
# annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)
# print("\tloading AnnotationSet... succeeds")
# promoters = annot.get_promoters()
# print("\tPromoters "+str(len(promoters)))
# gd = GenomeData(organism=genome)
# print("\t"+gd.get_annotation())
# print("\tloading GenomeData... succeeds")

# genome = "zv9"
# print("Checking " + genome)
# annot = AnnotationSet(genome,filter_havana=True,protein_coding=True,known_only=True)