def reset_action (protein_id, key): update_entry_in_status_file(protein_id, key, 'FAILED') crawler = DirectoryCrawler() if key == 'GENE_RETRIEVAL': clear_directory(crawler.get_gene_path(protein_id)) elif key == 'EXP_GENE_RETRIEVAL' : clear_directory(crawler.get_expanded_gene_path(protein_id)) elif key == 'PROTEIN_RETRIEVAL' : clear_directory(crawler.get_protein_path(protein_id)) elif key == 'ENSEMBL_EXON_RETRIEVAL' : clear_directory(crawler.get_exon_ensembl_path(protein_id)) elif key == 'GENEWISE_EXON_RETRIEVAL' : clear_directory(crawler.get_exon_genewise_path(protein_id)) clear_directory(crawler.get_genewise_path(protein_id)) elif key == 'REF_SP_DB_FORMATTING' : clear_directory(crawler.get_database_path(protein_id)) elif key == 'BLASTN_ALIGNMENT' : clear_directory(crawler.get_blastn_path(protein_id)) elif key == 'TBLASTN_ALIGNMENT' : clear_directory(crawler.get_tblastn_path(protein_id)) elif key == 'SW_GENE_ALIGNMENT' : clear_directory(crawler.get_SW_gene_path(protein_id)) elif key == 'SW_EXON_ALIGNMENT' : clear_directory(crawler.get_SW_exon_path(protein_id))
def generate_blastn_alignments(protein_id, species_list = None, referenced_species = "Homo_sapiens"): ''' Runs the blastn program for a specified protein and list of species @param protein_id @param species_list: if provided, runs blastn for this list of species, \ otherwise runs for species that are missing the blastn output \ who are determined by .status file in the blastn folder. ''' logger = Logger.Instance() alignment_logger = logger.get_logger('alignment') crawler = DirectoryCrawler() command_generator = CommandGenerator() alignment_generator = AlignmentTargetGenerator() failed_species_list = [] # retrieve the blastn targets if (not species_list): species_list = alignment_generator.get_blastn_targets(protein_id) for species in species_list: ############# MOVE TO ANOTHER FNC output_file = "{0}/{1}.blastout".format(crawler.get_blastn_path(protein_id), species.strip()) input_file = "{0}/{1}.fa".format(crawler.get_expanded_gene_path(protein_id), species.strip()) database = "{0}/{1}.fa".format(crawler.get_database_path(protein_id), referenced_species) command = command_generator.generate_blastn_command(database, input_file, output_file) command_return = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) output = command_return.stdout.read() if output != "": #LOGGING os.remove(output_file) alignment_logger.warning("{0}, {1}, BLASTN, {2}".format(protein_id, species.strip(), output.strip())) failed_species_list.append(species.strip()) if failed_species_list: alignment_generator.set_failed_blastn_targets(protein_id, failed_species_list) return False return True
def generate_SW_gene_alignments(protein_id, species_list = None, referenced_species = "Homo_sapiens"): ''' Runs the SW program for a specified protein and list of species, using the expanded gene region. @param protein_id @param species_list: if provided, runs SW for this list of species, \ otherwise runs for species that are missing the SW output \ who are determined by .status file in the /SW/gene folder. ''' logger = Logger.Instance() alignment_logger = logger.get_logger('alignment') alignment_generator = AlignmentTargetGenerator() crawler = DirectoryCrawler() command_generator = CommandGenerator() if (not species_list): species_list = alignment_generator.get_SW_gene_targets(protein_id) failed_species_list = [] for species in species_list: ########### MOVE output_file = "{0}/{1}.swout".format(crawler.get_SW_gene_path(protein_id), species.strip()) query_sequence_file = "{0}/{1}.fa".format(crawler.get_expanded_gene_path(protein_id), species.strip()) target_fasta_db_file = "{0}/{1}.fa".format(crawler.get_database_path(protein_id), referenced_species) command = command_generator.generate_SW_command(query_sequence_file, target_fasta_db_file, output_file) command_return = Popen(command, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) output = command_return.stdout.read() if output != "": #LOGGING alignment_logger.warning("{0}, {1}, SW GENE, {2}".format(protein_id, species.strip(), output.strip())) failed_species_list.append(species.strip()) os.remove(".sw_stdout_supressed") if failed_species_list: alignment_generator.set_failed_SW_gene_targets(protein_id, failed_species_list) return False return True