def _get_aln_diff(self, blast_result_files): file_handlers = FileHandlers() aln_diff = [] for result_file in blast_result_files: data = self._open_results_file(result_file) cleaned_data = [] for i in range(len(data)): fields = data[i].split(' ') cleaned = file_handlers.clean(fields) while cleaned.count('') > 0: cleaned.remove('') cleaned_data.append(cleaned) #for line in cleaned_data: gene_start = '' pdb_start = '' while gene_start == '': for line in cleaned_data: if ( len(line) == 4 and line[0] == 'Query' ): gene_start = int(line[1]) break else: pass while pdb_start == '': for line in cleaned_data: if ( len(line) == 4 and line[0] == 'Sbjct' ): pdb_start = int(line[1]) break else: pass seq_name = result_file.split('_')[0] + '_' + result_file.split('_')[3] + '_' + result_file.split('_')[4] start_site_difference = gene_start - pdb_start aln_diff.append((seq_name, start_site_difference)) #print aln_diff return aln_diff
def _open_results_file(self, result_file): file_handlers = FileHandlers() file_paths = file_handlers.search_directory() out_files = file_handlers.find_files(file_paths, 'out') for out_file in out_files: if result_file == file_handlers.get_file_name(out_file): Data = open(out_file, 'r') data = Data.readlines() Data.close() return data
def _pairwise_blast(self, query_files, subject_files, protein=False, nucleotide=False): file_handlers = FileHandlers() blast_command = 'blastp' if protein == True else 'blastn' blast_result_files = [] for query, subject in zip(query_files, subject_files): blast_result = ( file_handlers.get_file_name(query).split('.')[0] + file_handlers.get_file_name(subject).split('.')[0] + '_blastp.out') cmd = [blast_command + ' -query ' + query + ' -subject ' + subject + ' > ' + blast_result] subprocess.call(cmd, shell=True) blast_result_files.append(blast_result) return blast_result_files
def _get_fasta_file_paths(self): query_files = [] subject_files = [] for i in range(len(self.sequence_annotations)): organism_id = settings.ORGANISM_MAP[self.sequence_annotations[i][2].upper()] file_handlers = FileHandlers() file_paths = file_handlers.search_directory() fasta_files = file_handlers.find_files(file_paths, 'fasta') for fasta_file in fasta_files: if ( self.sequence_annotations[i][1] + '_' + organism_id ) == file_handlers.get_file_name(fasta_file).split('.fasta')[0]: query_files.append(fasta_file) elif ( self.sequence_annotations[i][1] + '_' + self.filename + '_chain-' + self.sequence_annotations[i][0] ) == file_handlers.get_file_name(fasta_file).split('.')[0]: subject_files.append(fasta_file) return query_files, subject_files
def _mkdir(self): file_handlers = FileHandlers() file_handlers.make_results_folder(self.dir_path.split('/')[-1])