def reset_action (protein_id, key): update_entry_in_status_file(protein_id, key, 'FAILED') crawler = DirectoryCrawler() if key == 'GENE_RETRIEVAL': clear_directory(crawler.get_gene_path(protein_id)) elif key == 'EXP_GENE_RETRIEVAL' : clear_directory(crawler.get_expanded_gene_path(protein_id)) elif key == 'PROTEIN_RETRIEVAL' : clear_directory(crawler.get_protein_path(protein_id)) elif key == 'ENSEMBL_EXON_RETRIEVAL' : clear_directory(crawler.get_exon_ensembl_path(protein_id)) elif key == 'GENEWISE_EXON_RETRIEVAL' : clear_directory(crawler.get_exon_genewise_path(protein_id)) clear_directory(crawler.get_genewise_path(protein_id)) elif key == 'REF_SP_DB_FORMATTING' : clear_directory(crawler.get_database_path(protein_id)) elif key == 'BLASTN_ALIGNMENT' : clear_directory(crawler.get_blastn_path(protein_id)) elif key == 'TBLASTN_ALIGNMENT' : clear_directory(crawler.get_tblastn_path(protein_id)) elif key == 'SW_GENE_ALIGNMENT' : clear_directory(crawler.get_SW_gene_path(protein_id)) elif key == 'SW_EXON_ALIGNMENT' : clear_directory(crawler.get_SW_exon_path(protein_id))
def load_exons(self): dc = DirectoryCrawler() logger = Logger.Instance() container_logger = logger.get_logger('containters') exon_file_path = dc.get_exon_genewise_path(self.ref_protein_id) exon_file_path += "/%s.fa" % self.species if not os.path.isfile(exon_file_path): container_logger.error ("{0},{1},genewise,no fasta file for genewise exons.".format(self.ref_protein_id, self.species)) return False try: exon_file = open(exon_file_path, 'r') except IOError: container_logger.error("%s,%s,%s" % (self.ref_protein_id, self.species, "No genewise exon file.")) return None seq_records = SeqIO.parse(exon_file, "fasta", unambiguous_dna) for seq_record in seq_records: (num,ir1,ir2,data) = seq_record.description.split() num = int(num) (length, start, stop) = data.split('|') exon = GenewiseExon((self.ref_protein_id, self.species), num, start, stop, seq_record.seq) self.exons[num] = exon return self.exons
def populate_sequence_exon_genewise(protein_id): ''' Populates the "/PROTEIN_ID/sequence/exon/genewisel/<species>.fa" folder with fasta files containing a list of all the exons for a particular transcript. The data is acquired using the genewise program. This is used for the proteins found with an ab_initio method, that dont have a list of exons on ensembl. ''' logger = Logger.Instance() alignment_logger = logger.get_logger('data_retrieval') directory_crawler = DirectoryCrawler() command_generator = CommandGenerator() exon_genewise_path = directory_crawler.get_exon_genewise_path(protein_id) try: (proteins_known, proteins_abinitio) = DescriptionParser().parse_descr_file(protein_id) except IOError, e: alignment_logger.error("{0}, {1}, , {2}".format(protein_id, 'GENEWISE', e)) return