def save_features(self, ask_filename=True): features_filename = os.path.join( self.host.settings.work_dir, self.host.get_project_name(), "%s.features" % self.host.get_project_name()) if ask_filename: features_filename = tkFileDialog.asksaveasfilename( filetypes=(("Sequence features", "*.features"), ("All", "*.*"))) Aln_basic.write_widget_into_file(self.features.text_widget, features_filename, ask_filename)
def save_TMHMM(self, ask_filename=True): TMHMM_filename = os.path.join( self.host.settings.work_dir, self.host.get_project_name(), "%s.TMHMM" % self.host.get_project_name()) if ask_filename: TMHMM_filename = tkFileDialog.asksaveasfilename( filetypes=(("Plain output of the TMHMM", "*.TMHMM"), ("All", "*.*"))) Aln_basic.write_widget_into_file(self.TMHMM_results.text_widget, TMHMM_filename, ask_filename)
def align(self): if self.seq_input_frame.text_is_empty(): # No sequences were provided self.host.set_status("No sequences were provided to align!", "#FF0000") return print (" Alignment construction started...") (muscle_name, muscle_path) = Settings.get_program_name(self.host.settings.muscle_dir, "muscle") unaligned_filename = os.path.join(self.host.settings.work_dir, "%s.fasta" % self.host.temp_name) Aln_basic.write_widget_into_file(self.seq_input_frame.text_widget, unaligned_filename) aligned_filename = os.path.join(self.host.settings.work_dir, "%s.aln" % self.host.temp_name) self.host.set_status("Alignment") maxiters_option = "" if self.maxiters.get() != "": try: maxiters_option = "-maxiters %i" % int(self.maxiters.get()) except TypeError: print ("Option -maxiters is not an integer and is ignored!") gapopen_option = "" if self.gapopen.get() != "": if Aln_basic.is_negative_float(self.gapopen.get(), "-gapopen"): gapopen_option = "-gapopen %s" % self.gapopen.get() gapextend_option = "" if self.gapextend.get() != "": if Aln_basic.is_negative_float(self.gapextend.get(), "-gapextend"): gapextend_option = "-gapextend %s" % self.gapextend.get() muscle_command = "%s -in %s -out %s %s %s %s" % (muscle_path, unaligned_filename, aligned_filename, maxiters_option, gapopen_option, gapextend_option) print ("Muscle command to be ran:") print (muscle_command) if self.host.verbose.get(): os.system(muscle_command) else: os.system("%s 1> nul 2> nul" % muscle_command) Aln_basic.read_widget_from_file(self.aln_input_frame.text_widget, aligned_filename) if self.insert_blocks.get(): # Empty sequence >BLOCKS should be added curr_seqs = Aln_basic.read_fasta_from_strings(self.aln_input_frame.get_strings()) self.aln_input_frame.text_widget.delete(1.0, tkinter.END) upd_aln_file = open(aligned_filename, "w") upd_aln_file.write(">BLOCKS\n") upd_aln_file.write("%s\n\n" % ("-" * len(curr_seqs[0].sequence))) upd_aln_file.write(">SITE\n") upd_aln_file.write("%s\n\n" % ("-" * len(curr_seqs[0].sequence))) for s in curr_seqs: s.print_fasta(upd_aln_file, 60) upd_aln_file.close() Aln_basic.read_widget_from_file(self.aln_input_frame.text_widget, aligned_filename) self.host.set_status("Ready") os.remove(unaligned_filename) os.remove(aligned_filename) print (" [..DONE..]")
def save_IDs(self, ask_filename = True): ids_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.ids" % self.host.get_project_name()) if ask_filename: ids_filename = tkFileDialog.asksaveasfilename(filetypes = (("List of protein IDs", "*.ids"), ("All", "*.*"))) curr_ID_save_mode = self.ID_save_mode.get() curr_text = self.IDs.text_widget.get(1.0, tkinter.END).strip() if curr_ID_save_mode != "No change": print (" IDs will be converted before writing!") to_gi = False fix = False if (curr_ID_save_mode == " -> gi ") or (curr_ID_save_mode == "fix & ->"): to_gi = True if (curr_ID_save_mode == "fix gi-gi") or (curr_ID_save_mode == "fix & ->"): fix = True locus_to_gi = dict() id_to_gi = dict() if to_gi: self.host.set_status("Working") import udav_soft try: #FIX: (version 1.0) if <settings.table_filename> is not given, nothing bad will happen (locus_to_gi, id_to_gi, a, b) = udav_soft.read_protein_table_info(self.host.settings.table_filename, True) except AttributeError: print (" [ERROR]: Table with assignment of GI to other types of IDs was not given to the script!") print (" Please check 'table_filename' option in the <settings.ini> file") except OSError: print (" [ERROR]: Table with assignment of GI to other types of IDs '%s' not found!" % self.host.settings.table_filename) del udav_soft self.host.set_status("Ready") strings = curr_text.split("\n") i = 0 while i < len(strings): curr_id = strings[i].strip() if fix: curr_id = curr_id.split("-", 1)[0] new_id = curr_id if to_gi: if curr_id in locus_to_gi: new_id = locus_to_gi[curr_id] if curr_id in id_to_gi: new_id = id_to_gi[curr_id] strings[i] = new_id i += 1 self.IDs.text_widget.delete(1.0, tkinter.END) self.IDs.text_widget.insert(tkinter.END, "\n".join(strings).strip()) Aln_basic.write_widget_into_file(self.IDs.text_widget, ids_filename, ask_filename) self.IDs.text_widget.delete(1.0, tkinter.END) self.IDs.text_widget.insert(tkinter.END, curr_text)
def hmmsearch_profile_database(self, profile_database, database_type): curr_project_dir = os.path.join(self.host.settings.work_dir, self.host.get_project_name()) if not os.path.isdir(curr_project_dir): dir_answer = tkMessageBox.askyesno("Create directory and start analysis?", "This type of analysis cannot be done if the project directory is not created. Do you want to create the following directory and continue?\n%s" % curr_project_dir) if dir_answer != True: return print (" Creating project directory de novo: '%s'" % curr_project_dir) os.mkdir(curr_project_dir) (hmmscan_name, hmmscan_path) = Settings.get_program_name(self.host.settings.hmmer_dir, "hmmscan") pure_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.pure" % self.host.get_project_name()) Aln_basic.write_widget_into_file(self.pure.text_widget, pure_filename) result_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.%s_out" % (self.host.get_project_name(), database_type)) table_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.%s_table" % (self.host.get_project_name(), database_type)) args = [hmmscan_path, "--domtblout", table_filename, "-o", result_filename, profile_database, pure_filename] subprocess.Popen(args, stderr = subprocess.PIPE, stdout = subprocess.PIPE) self.host.pending_filenames.append(result_filename) self.host.pending_filenames.append(table_filename) self.host.enable_check_button()
def obtain_features(self): print(" Obtaining features...") curr_mode = self.feature_mode.get() domain_filename = os.path.join(self.host.settings.work_dir, "%s.domain_table" % self.host.temp_name) domain_not_empty = None if curr_mode == "COG": domain_not_empty = Aln_basic.write_widget_into_file( self.hmmresults_COG.text_widget, domain_filename) else: domain_not_empty = Aln_basic.write_widget_into_file( self.hmmresults_Pfam.text_widget, domain_filename) fixed_filename = os.path.join(self.host.settings.work_dir, "%s.fixed" % self.host.temp_name) dom_filename = "%s.domain_info" % self.host.temp_name Aln_basic.write_widget_into_file(self.host.parse_tab.fixed.text_widget, fixed_filename) obtain_features_path = os.path.join(self.host.settings.script_dir, "obtain_features.py") data_for_features = "" if domain_not_empty: data_for_features += "-p %s" % os.path.basename(domain_filename) if not self.TMHMM_results.text_is_empty(): TM_filename = os.path.join(self.host.settings.work_dir, "%s.TMHMM" % self.host.temp_name) Aln_basic.write_widget_into_file(self.TMHMM_results.text_widget, TM_filename) data_for_features += " -t %s" % os.path.basename(TM_filename) self.host.set_status("Working") if self.host.verbose.get(): os.system("%s -i %s -w %s -o %s %s -e %s -f %s -d %s" % (obtain_features_path, os.path.basename(fixed_filename), self.host.settings.work_dir, "%s.features" % self.host.temp_name, data_for_features, self.evalue_threshold.get(), self.overlap_threshold.get(), dom_filename)) else: os.system( "%s -i %s -w %s -o %s %s -e %s -f %s -d %s 1> nul 2> nul" % (obtain_features_path, os.path.basename(fixed_filename), self.host.settings.work_dir, "%s.features" % self.host.temp_name, data_for_features, self.evalue_threshold.get(), self.overlap_threshold.get(), dom_filename)) self.host.set_status("Ready") Aln_basic.read_widget_from_file( self.features.text_widget, os.path.join(self.host.settings.work_dir, "%s.features" % self.host.temp_name)) dom_filename_full = os.path.join(self.host.settings.work_dir, dom_filename) if os.path.isfile( dom_filename_full): # File was created (non-empty domains) domain_dict = Aln_basic.read_domain_info_file(dom_filename_full) self.host.load_domain_info( domain_dict) # Loading domain info into the info tab print(" [..DONE..]")
def check_input(self): print (" Checking input alignment...") if self.aln_input_frame.text_is_empty(): # No alignment is provided self.host.set_status("No alignment is provided!", "#FF0000") else: aligned_filename = os.path.join(self.host.settings.work_dir, "%s.aln" % self.host.temp_name) Aln_basic.write_widget_into_file(self.aln_input_frame.text_widget, aligned_filename) import udav_base try: seq_list = udav_base.read_alignment(aligned_filename) if type(seq_list) == type(""): # This means that at least one sequence in alignment differs in length from other self.host.set_status(seq_list, "#FF0000") else: status_OK = True id_to_name = dict() for s in seq_list: s.remove_limits(False, False) if s.ID in id_to_name: # Identical protein IDs detected status_OK = False print (" [..WARNING..] Identical protein ID detected: '%s'" % s.ID) id_to_name[s.ID] = s.name my_format = re.match("^[^\|]+\|[^\|]+\|[^\|]+$", s.name) my_format_simple = re.match("^[^\|]+\|[^\|]+$", s.name) if not (my_format or my_format_simple) and not ((s.name == "BLOCKS") or (s.name == "SITE")): status_OK = False print (" [..WARNING..] This name can fail a purification step. Consider 'My' format instead") print (" Current name: '%s'" % s.name) print (" 'My' format example: 'ID|smth|organism' or 'ID|organism'") if status_OK: self.host.set_status("OK") else: self.host.set_status("Alignment has problems with names format, check console for details", "#888800") except IndexError: self.host.set_status("Alignment is corrupted; check that it is in FASTA format!", "#888800") del udav_base os.remove(aligned_filename) print (" [..DONE..]")
def save_sequence_sample(self): sequence_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.sample" % self.host.get_project_name()) Aln_basic.write_widget_into_file(self.seq_input_frame.text_widget, sequence_filename, False)
def save_alignment(self): aligned_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.aln" % self.host.get_project_name()) Aln_basic.write_widget_into_file(self.aln_input_frame.text_widget, aligned_filename, False)
def save_pure(self, ask_filename = True): pure_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.pure" % self.host.get_project_name()) if ask_filename: pure_filename = tkFileDialog.asksaveasfilename(filetypes = (("Non-aligned sequences with pure names (fasta)", "*.pure"), ("All", "*.*"))) Aln_basic.write_widget_into_file(self.pure.text_widget, pure_filename, ask_filename)
def parse(self): aligned_filename = os.path.join(self.settings.work_dir, "%s.aln" % self.temp_name) Aln_basic.write_widget_into_file( self.input_tab.aln_input_frame.text_widget, aligned_filename) remove_seq_limits_path = os.path.join(self.settings.script_dir, "remove_seq_limits.py") self.set_status("Working") if self.verbose.get(): os.system("%s -i %s -w %s -o %s -d -x" % (remove_seq_limits_path, "%s.aln" % self.temp_name, self.settings.work_dir, self.temp_name)) else: os.system("%s -i %s -w %s -o %s -d -x 1> nul 2> nul" % (remove_seq_limits_path, "%s.aln" % self.temp_name, self.settings.work_dir, self.temp_name)) self.set_status("Ready") try: os.remove( os.path.join(self.settings.work_dir, "%s.aln" % self.temp_name)) os.remove( os.path.join(self.settings.work_dir, "%s.blocks" % self.temp_name)) os.remove( os.path.join(self.settings.work_dir, "%s.motif" % self.temp_name)) os.remove( os.path.join(self.settings.work_dir, "%s.motif_var" % self.temp_name)) os.remove( os.path.join(self.settings.work_dir, "%s.orgs" % self.temp_name)) os.remove( os.path.join(self.settings.work_dir, "%s.pure.correspond" % self.temp_name)) except OSError: pass fixed_filename = os.path.join(self.settings.work_dir, "%s.fixed" % self.temp_name) Aln_basic.read_widget_from_file(self.parse_tab.fixed.text_widget, fixed_filename) os.remove(fixed_filename) pure_filename = os.path.join(self.settings.work_dir, "%s.pure" % self.temp_name) Aln_basic.read_widget_from_file(self.parse_tab.pure.text_widget, pure_filename) self.parse_tab.enable_pure_analysis() os.remove(pure_filename) blocks_filename = os.path.join(self.settings.work_dir, "%s.blocks_regions" % self.temp_name) if os.path.isfile(blocks_filename): Aln_basic.read_widget_from_file(self.parse_tab.blocks.text_widget, blocks_filename) os.remove(blocks_filename) ids_filename = os.path.join(self.settings.work_dir, "%s.ids" % self.temp_name) Aln_basic.read_widget_from_file(self.parse_tab.IDs.text_widget, ids_filename) os.remove(ids_filename) self.parse_tab.check_numbers()
def save_logs(self, ask_if_exists = True): auto_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.auto_log" % self.host.get_project_name()) Aln_basic.write_widget_into_file(self.auto_log.text_widget, auto_filename, ask_if_exists, True) man_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.man_log" % self.host.get_project_name()) Aln_basic.write_widget_into_file(self.manual_log.text_widget, man_filename, ask_if_exists, True)