def load_protein_configuration(protein_id, ref_species_dict = None): ''' Loads the data from a description file, and calls the containers generating functions to create basic objects. @param protein_id: id of a single protein ''' if not check_status_file_no_alignment(protein_id): return False if ref_species_dict is None: ref_species_dict = FileUtilities.get_reference_species_dictionary() logger = Logger.Instance() containers_logger = logger.get_logger('containers') data_map_container = DataMapContainer.Instance() protein_container = ProteinContainer.Instance() gene_container = GeneContainer.Instance() transcript_container = TranscriptContainer.Instance() ens_exon_container = EnsemblExonContainer.Instance() (known_proteins, abinitio_proteins) = DescriptionParser().parse_description_file_general_info(protein_id) for species_data in known_proteins: (species_name, spec_protein_id, spec_gene_id, spec_transcript_id, location_type, assembly, location_id, seq_begin, seq_end, strand) = species_data ab_initio = False # data map data_map_key = (protein_id, species_name) data_map = DataMap(spec_protein_id, spec_transcript_id, spec_gene_id, data_map_key, location_type, assembly, location_id, strand, seq_begin, seq_end, ab_initio) try: data_map_container.add(data_map_key, data_map) except (KeyError, TypeError), e: containers_logger.error("{0}, {1}, {2}, error adding to datamap".format(protein_id, species_name, e.args[0])) # everything else - protein, transcript, gene, ensembl exons protein = Protein(spec_protein_id, data_map_key, ref_species_dict[species_name]) gene = Gene(spec_gene_id, data_map_key, ref_species_dict[species_name]) transcript = Transcript(spec_transcript_id, data_map_key, ref_species_dict[species_name]) ens_exons = EnsemblExons(data_map_key, ref_species_dict[species_name]) try: ens_exons.load_exons() except (Exception), e: containers_logger.error("{0}, {1}, {2}, error loading exons".format(protein_id, species_name, e.args[0]))
def load_exon_configuration (ref_protein_id, ref_species_dict, exon_type): ''' Load exons of a particular type for all available species @param ref_protein_id: referent protein id @param exon_type: exon_type: ensembl, genewise, blatn, tblastn, sw_gene, sw_exon ''' dc = DescriptionParser() exon_container = ExonContainer.Instance() ens_exon_container = EnsemblExonContainer.Instance() logger = Logger.Instance() containers_logger = logger.get_logger('containers') if exon_type == "ensembl" or exon_type == "genewise": if not check_status_file_no_alignment(ref_protein_id): containers_logger.info ("{0},exon_type:{1},check status file -> failed".format(ref_protein_id, exon_type)) return False else: if not check_status_file(ref_protein_id): containers_logger.info ("{0},exon_type:{1},check status file -> failed".format(ref_protein_id, exon_type)) return False if not ref_species_dict: ref_species_dict = FileUtilities.get_reference_species_dictionary() (known_species, abinitio_species) = dc.get_separated_species(ref_protein_id) for species in known_species: ref_species = ref_species_dict[species] if exon_type != "genewise": if exon_type == "ensembl": exons = EnsemblExons ((ref_protein_id, species), ref_species) try: exon_dict = exons.load_exons() except Exception, e: containers_logger.error("{0},{1},{2},error loading exons".format(ref_protein_id, species, exon_type)) continue else: exons = Exons((ref_protein_id, species), ref_species, exon_type) try: exon_dict = exons.load_exons() except Exception, e: containers_logger.error("{0},{1},{2},error loading exons".format(ref_protein_id, species, exon_type)) continue if not exon_dict: continue if (exon_type != "ensembl"): exons.set_exon_ordinals() data_map_key = [ref_protein_id, species] exon_container.add(exon_type, data_map_key, exons)