Esempio n. 1
0
    def load_featured_sequences(self):
        """
        Method maps data from the Features tab (if it is not blank) on the alignment.
        """
        if self.host.features_tab.features.text_is_empty(
        ):  # No features obtained
            self.host.set_status(
                "Cannot show domains; check that features in the 'Features' tab are obtained",
                "#888800")
            tkMessageBox.showinfo(
                "Obtain features first",
                "Before purification analysis please obtain features in the 'Features' tab!"
            )
            return

        import udav_base
        self.host.set_status(
            "Obtaining sequence features, check the console for progress",
            "#FF0000")
        aligned_filename = os.path.join(self.host.settings.work_dir,
                                        "%s.aln" % self.host.temp_name)
        Aln_basic.write_widget_into_file(
            self.host.input_tab.aln_input_frame.text_widget, aligned_filename,
            False)
        features_filename = os.path.join(self.host.settings.work_dir,
                                         "%s.features" % self.host.temp_name)
        Aln_basic.write_widget_into_file(
            self.host.features_tab.features.text_widget, features_filename,
            False)
        self.featured_sequences = udav_base.get_featured(
            aligned_filename, None, features_filename, dict(), dict(), False)
        del udav_base
        self.host.set_status("Ready")
        print("    [..DONE..]")
Esempio n. 2
0
    def create_UI(self):
        self.grid_rowconfigure(0, weight = 1)
        self.grid_columnconfigure(0, weight = 1)
        central_panel = tkinter.PanedWindow(self, orient = tkinter.HORIZONTAL, sashwidth = self.p * 2, sashrelief = tkinter.RIDGE, background = self.host.back)
        central_panel.grid(row = 0, column = 0, sticky = "NSEW")
        
        self.fixed = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, None, None, 
                               "'Fixed' alignment:", "Save to file")
        self.fixed.button.configure(command = self.save_fixed)
        sort_by_tree = tkinter.Button(self.fixed.panel, background = self.host.header, foreground = "#FFFFFF", text = "Sort by tree", command = self.sort_by_tree)
        sort_by_tree.grid(row = 0, column = 2, sticky = "NSW", padx = self.p, pady = self.p)  
        central_panel.add(self.fixed)

        self.pure = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, None, None,  
                               "'Pure names':", "Save to file")
        self.pure.button.configure(command = self.save_pure)
        self.run_hmmsearch_Pfam = tkinter.Button(self.pure.panel, state = tkinter.DISABLED, text = "HMM search (Pfam)", command = self.hmmsearch_Pfam)
        self.run_hmmsearch_Pfam.grid(row = 0, column = 2, sticky = "NSW", padx = self.p, pady = self.p)  
        self.run_hmmsearch_COG = tkinter.Button(self.pure.panel, state = tkinter.DISABLED, text = "HMM search (COG)", command = self.hmmsearch_COG)
        self.run_hmmsearch_COG.grid(row = 0, column = 3, sticky = "NSW", padx = self.p, pady = self.p)  
        self.run_TMHMM = tkinter.Button(self.pure.panel, state = tkinter.DISABLED, text = "TMHMM", command = self.TMHMM)
        self.run_TMHMM.grid(row = 0, column = 4, sticky = "NSW", padx = self.p, pady = self.p)  

        tkinter.Label(self.pure.panel, text = "Enter begin, step and nsteps:").grid(row = 1, column = 0, sticky = "NSW")
        self.begin_entry = tkinter.Entry(self.pure.panel, width = 8)
        self.begin_entry.grid(row = 1, column = 1, sticky = "NSW", padx = self.p, pady = self.p)
        self.begin_entry.insert(tkinter.END, 200)
        self.step_entry = tkinter.Entry(self.pure.panel, width = 8)
        self.step_entry.grid(row = 1, column = 2, sticky = "NSW", padx = self.p, pady = self.p)
        self.step_entry.insert(tkinter.END, 50)
        self.nsteps_entry = tkinter.Entry(self.pure.panel, width = 8)
        self.nsteps_entry.grid(row = 1, column = 3, sticky = "NSW", padx = self.p, pady = self.p)
        self.nsteps_entry.insert(tkinter.END, 10)
        self.length_histo = tkinter.Button(self.pure.panel, text = "To log", state = tkinter.DISABLED, command = self.histo_to_log)
        self.length_histo.grid(row = 1, column = 4, sticky = "NSW", padx = self.p, pady = self.p)
        central_panel.add(self.pure)

        self.blocks = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, None, None, 
                               "Blocks regions:", "Save to file")
        self.blocks.button.configure(command = self.save_blocks)
        central_panel.add(self.blocks)  

        self.IDs = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, None, None, 
                               "List of IDs:", "Save to file")
        self.IDs.button.configure(command = self.save_IDs)
        self.ID_save_mode = tkinter.StringVar()
        self.ID_save_mode.set("No change")
        ID_save_mode = tkinter.OptionMenu(self.IDs.panel, self.ID_save_mode, "No change", "  -> gi  ", "fix gi-gi", "fix & ->") 
        ID_save_mode.grid(row = 0, column = 2, sticky = "NW", padx = self.p, pady = self.p)         
        try: #FIX: version 1.0.0 (if no assignment between gi and id/locus was given, conversion options are disabled)
            curr_table = self.host.settings.table_filename
        except AttributeError:
            ID_save_mode.configure(state = tkinter.DISABLED)
        
        central_panel.add(self.IDs)
        self.update_idletasks()
        central_panel.sash_place(0, 300, 1)
        central_panel.sash_place(1, 1100, 1)
        central_panel.sash_place(2, 1500, 1)
Esempio n. 3
0
 def save_blocks(self, ask_filename = True):
     name_prefix = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), self.host.get_project_name())
     if ask_filename:
         name_prefix = tkFileDialog.asksaveasfilename(filetypes = (("Prefix for blocks and MEGA files", "*.*"),))
     blocks_filename = name_prefix + ".blocks_regions"
     mega_filename = name_prefix + "_blocks.meg"
     Aln_basic.write_widget_into_file(self.blocks.text_widget, blocks_filename, ask_filename)
     Aln_basic.export_MEGA_format(self.blocks.text_widget, mega_filename, os.path.basename(blocks_filename))
Esempio n. 4
0
 def save_fixed(self, ask_filename = True):
     name_prefix = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), self.host.get_project_name())
     if ask_filename:
         name_prefix = tkFileDialog.asksaveasfilename(filetypes = (("Prefix for fixed (fasta) and MEGA", "*.*"), ))
     fixed_filename = name_prefix + ".fixed"
     mega_filename = name_prefix + "_fixed.meg"
     Aln_basic.write_widget_into_file(self.fixed.text_widget, fixed_filename, ask_filename)
     Aln_basic.export_MEGA_format(self.fixed.text_widget, mega_filename, os.path.basename(fixed_filename))
Esempio n. 5
0
 def save_TMHMM(self, ask_filename=True):
     TMHMM_filename = os.path.join(
         self.host.settings.work_dir, self.host.get_project_name(),
         "%s.TMHMM" % self.host.get_project_name())
     if ask_filename:
         TMHMM_filename = tkFileDialog.asksaveasfilename(
             filetypes=(("Plain output of the TMHMM", "*.TMHMM"), ("All",
                                                                   "*.*")))
     Aln_basic.write_widget_into_file(self.TMHMM_results.text_widget,
                                      TMHMM_filename, ask_filename)
Esempio n. 6
0
 def save_features(self, ask_filename=True):
     features_filename = os.path.join(
         self.host.settings.work_dir, self.host.get_project_name(),
         "%s.features" % self.host.get_project_name())
     if ask_filename:
         features_filename = tkFileDialog.asksaveasfilename(
             filetypes=(("Sequence features", "*.features"), ("All",
                                                              "*.*")))
     Aln_basic.write_widget_into_file(self.features.text_widget,
                                      features_filename, ask_filename)
Esempio n. 7
0
    def obtain_features(self):
        print("    Obtaining features...")
        curr_mode = self.feature_mode.get()
        domain_filename = os.path.join(self.host.settings.work_dir,
                                       "%s.domain_table" % self.host.temp_name)
        domain_not_empty = None
        if curr_mode == "COG":
            domain_not_empty = Aln_basic.write_widget_into_file(
                self.hmmresults_COG.text_widget, domain_filename)
        else:
            domain_not_empty = Aln_basic.write_widget_into_file(
                self.hmmresults_Pfam.text_widget, domain_filename)
        fixed_filename = os.path.join(self.host.settings.work_dir,
                                      "%s.fixed" % self.host.temp_name)
        dom_filename = "%s.domain_info" % self.host.temp_name
        Aln_basic.write_widget_into_file(self.host.parse_tab.fixed.text_widget,
                                         fixed_filename)

        obtain_features_path = os.path.join(self.host.settings.script_dir,
                                            "obtain_features.py")

        data_for_features = ""
        if domain_not_empty:
            data_for_features += "-p %s" % os.path.basename(domain_filename)
        if not self.TMHMM_results.text_is_empty():
            TM_filename = os.path.join(self.host.settings.work_dir,
                                       "%s.TMHMM" % self.host.temp_name)
            Aln_basic.write_widget_into_file(self.TMHMM_results.text_widget,
                                             TM_filename)
            data_for_features += " -t %s" % os.path.basename(TM_filename)
        self.host.set_status("Working")
        if self.host.verbose.get():
            os.system("%s -i %s -w %s -o %s %s -e %s -f %s -d %s" %
                      (obtain_features_path, os.path.basename(fixed_filename),
                       self.host.settings.work_dir,
                       "%s.features" % self.host.temp_name, data_for_features,
                       self.evalue_threshold.get(),
                       self.overlap_threshold.get(), dom_filename))
        else:
            os.system(
                "%s -i %s -w %s -o %s %s -e %s -f %s -d %s 1> nul 2> nul" %
                (obtain_features_path, os.path.basename(fixed_filename),
                 self.host.settings.work_dir,
                 "%s.features" % self.host.temp_name, data_for_features,
                 self.evalue_threshold.get(), self.overlap_threshold.get(),
                 dom_filename))
        self.host.set_status("Ready")
        Aln_basic.read_widget_from_file(
            self.features.text_widget,
            os.path.join(self.host.settings.work_dir,
                         "%s.features" % self.host.temp_name))

        dom_filename_full = os.path.join(self.host.settings.work_dir,
                                         dom_filename)
        if os.path.isfile(
                dom_filename_full):  # File was created (non-empty domains)
            domain_dict = Aln_basic.read_domain_info_file(dom_filename_full)
            self.host.load_domain_info(
                domain_dict)  # Loading domain info into the info tab
        print("    [..DONE..]")
Esempio n. 8
0
    def save_IDs(self, ask_filename = True):
        ids_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.ids" % self.host.get_project_name())
        if ask_filename:
            ids_filename = tkFileDialog.asksaveasfilename(filetypes = (("List of protein IDs", "*.ids"), ("All", "*.*")))       

        curr_ID_save_mode = self.ID_save_mode.get()
        curr_text = self.IDs.text_widget.get(1.0, tkinter.END).strip()
        if curr_ID_save_mode != "No change":
            print ("    IDs will be converted before writing!")
            to_gi = False
            fix = False
            if (curr_ID_save_mode == "  -> gi  ") or (curr_ID_save_mode == "fix & ->"):
                to_gi = True
            if (curr_ID_save_mode == "fix gi-gi") or (curr_ID_save_mode == "fix & ->"):
                fix = True
            
            locus_to_gi = dict()
            id_to_gi = dict()
            if to_gi:
                self.host.set_status("Working")
                import udav_soft
                try: #FIX: (version 1.0) if <settings.table_filename> is not given, nothing bad will happen
                    (locus_to_gi, id_to_gi, a, b) = udav_soft.read_protein_table_info(self.host.settings.table_filename, True)
                except AttributeError: 
                    print ("    [ERROR]: Table with assignment of GI to other types of IDs was not given to the script!")
                    print ("             Please check 'table_filename' option in the <settings.ini> file")                   
                except OSError:
                    print ("    [ERROR]: Table with assignment of GI to other types of IDs '%s' not found!" % self.host.settings.table_filename) 
                del udav_soft             
                self.host.set_status("Ready")
                
            strings = curr_text.split("\n")
            i = 0
            while i < len(strings):
                curr_id = strings[i].strip()
                if fix:
                    curr_id = curr_id.split("-", 1)[0]
                new_id = curr_id
                if to_gi:
                    if curr_id in locus_to_gi:
                        new_id = locus_to_gi[curr_id]                 
                    if curr_id in id_to_gi:
                        new_id = id_to_gi[curr_id]
                strings[i] = new_id
                i += 1
            self.IDs.text_widget.delete(1.0, tkinter.END)
            self.IDs.text_widget.insert(tkinter.END, "\n".join(strings).strip())

        Aln_basic.write_widget_into_file(self.IDs.text_widget, ids_filename, ask_filename)
        self.IDs.text_widget.delete(1.0, tkinter.END)     
        self.IDs.text_widget.insert(tkinter.END, curr_text)            
Esempio n. 9
0
 def palette_is_saved(self, colors, data_type, names):
     """
     Method for interaction between the <ColorFrame.ColorWind> and this program:
     it is called to compare current color/name set with the one in
     the <ColorFrame.ColorWind> window. Returns True or False
     """
     curr_colors = list()
     curr_names = list()
     for pair in self.domain_colors:
         curr_colors.append(pair[1])
         curr_names.append(pair[0])
     same_colors = Aln_basic.compare_sets(colors, curr_colors)
     same_names = Aln_basic.compare_sets(names, curr_names)
     return same_colors and same_names
Esempio n. 10
0
    def align(self):        
        if self.seq_input_frame.text_is_empty(): # No sequences were provided
            self.host.set_status("No sequences were provided to align!", "#FF0000")
            return
        print ("    Alignment construction started...")
        (muscle_name, muscle_path) = Settings.get_program_name(self.host.settings.muscle_dir, "muscle")
        unaligned_filename = os.path.join(self.host.settings.work_dir, "%s.fasta" % self.host.temp_name)
        Aln_basic.write_widget_into_file(self.seq_input_frame.text_widget, unaligned_filename)
        aligned_filename = os.path.join(self.host.settings.work_dir, "%s.aln" % self.host.temp_name)

        self.host.set_status("Alignment")
        maxiters_option = ""
        if self.maxiters.get() != "":
            try:
                maxiters_option = "-maxiters %i" % int(self.maxiters.get())
            except TypeError:
                print ("Option -maxiters is not an integer and is ignored!")
        gapopen_option = ""
        if self.gapopen.get() != "":
            if Aln_basic.is_negative_float(self.gapopen.get(), "-gapopen"):
                gapopen_option = "-gapopen %s" % self.gapopen.get()
        gapextend_option = ""
        if self.gapextend.get() != "":
            if Aln_basic.is_negative_float(self.gapextend.get(), "-gapextend"):
                gapextend_option = "-gapextend %s" % self.gapextend.get()
        muscle_command = "%s -in %s -out %s %s %s %s" % (muscle_path, unaligned_filename, aligned_filename, maxiters_option, gapopen_option, gapextend_option)
        print ("Muscle command to be ran:")
        print (muscle_command)        
        if self.host.verbose.get():                     
            os.system(muscle_command)
        else:
            os.system("%s 1> nul 2> nul" % muscle_command)

        Aln_basic.read_widget_from_file(self.aln_input_frame.text_widget, aligned_filename)
        if self.insert_blocks.get(): # Empty sequence >BLOCKS should be added
            curr_seqs = Aln_basic.read_fasta_from_strings(self.aln_input_frame.get_strings())
            self.aln_input_frame.text_widget.delete(1.0, tkinter.END)
            upd_aln_file = open(aligned_filename, "w")            
            upd_aln_file.write(">BLOCKS\n")
            upd_aln_file.write("%s\n\n" % ("-" * len(curr_seqs[0].sequence)))
            upd_aln_file.write(">SITE\n")
            upd_aln_file.write("%s\n\n" % ("-" * len(curr_seqs[0].sequence)))
            for s in curr_seqs:
                s.print_fasta(upd_aln_file, 60)
            upd_aln_file.close()
            Aln_basic.read_widget_from_file(self.aln_input_frame.text_widget, aligned_filename)            
                    
        self.host.set_status("Ready")

        os.remove(unaligned_filename)
        os.remove(aligned_filename)
        print ("    [..DONE..]")
Esempio n. 11
0
    def hmmsearch_profile_database(self, profile_database, database_type):
        curr_project_dir = os.path.join(self.host.settings.work_dir, self.host.get_project_name())
        if not os.path.isdir(curr_project_dir):
            dir_answer = tkMessageBox.askyesno("Create directory and start analysis?", "This type of analysis cannot be done if the project directory is not created. Do you want to create the following directory and continue?\n%s" % curr_project_dir)
            if dir_answer != True:
               return
            print ("    Creating project directory de novo: '%s'" % curr_project_dir)
            os.mkdir(curr_project_dir)

        (hmmscan_name, hmmscan_path) = Settings.get_program_name(self.host.settings.hmmer_dir, "hmmscan")
        pure_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.pure" % self.host.get_project_name())
        Aln_basic.write_widget_into_file(self.pure.text_widget, pure_filename)
        result_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.%s_out" % (self.host.get_project_name(), database_type))
        table_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.%s_table" % (self.host.get_project_name(), database_type))
        args = [hmmscan_path, "--domtblout", table_filename, "-o", result_filename, profile_database, pure_filename]
        subprocess.Popen(args, stderr = subprocess.PIPE, stdout = subprocess.PIPE)
        self.host.pending_filenames.append(result_filename)
        self.host.pending_filenames.append(table_filename)
        self.host.enable_check_button()
Esempio n. 12
0
    def apply_actions(self, ids_to_remove, ids_to_fix):
        print ("    Removement started...")
        seqs = None
        if self.seq_input_frame.text_is_empty():
            tkMessageBox.showwarning("Unaligned sequences are not provided", "Alnalyser cannot find unaligned sequences. They will be loaded from the alignment panel and unaligned. Consider building new alignment after this step!")
            seqs = Aln_basic.read_fasta_from_strings(self.aln_input_frame.get_strings(), True)
        else:
            seqs = Aln_basic.read_fasta_from_strings(self.seq_input_frame.get_strings())
            self.seq_input_frame.text_widget.delete(1.0, tkinter.END)
        reason_to_id = dict()
        r = 0

        no_org_remains = list()
        for s in seqs:
            if s.ID in ids_to_remove:
                curr_reason = ids_to_remove[s.ID][0]
                org_remains = ids_to_remove[s.ID][1]
                if not curr_reason in reason_to_id:
                    reason_to_id[curr_reason] = list()
                reason_to_id[curr_reason].append(s.ID)
                r += 1
                if not org_remains:
                    no_org_remains.append((s.ID, s.organism))
                continue
            self.seq_input_frame.text_widget.insert(tkinter.END, ">%s\n" % s.name)
            self.seq_input_frame.text_widget.insert(tkinter.END, s.sequence + "\n\n")

        curr_message = "Removement log: %i sequences removed from %i (%i remained)\n" % (r, len(seqs), len(seqs) - r)
        curr_message += self.host.purify_tab.get_curr_options()
        for reason in reason_to_id:
            curr_message += "Reason(s) - %s:\n" % reason
            for protein_id in reason_to_id[reason]:
                curr_message += "%s, " % protein_id
            curr_message = "%s\n" % (curr_message.strip(", "))
        curr_message += "\n"
        curr_message += "For %i removements no protein from this organism remained in the sample\n" % len(no_org_remains)
        for pair in no_org_remains:
            curr_message += "%s\t%s\n" % (pair[0], pair[1])
        self.host.log_tab.write_to_log(curr_message, True)

        print ("    [..DONE..] Total %i removes done (%i possible)" % (r, len(ids_to_remove.keys())))
Esempio n. 13
0
    def sort_by_tree(self):
        tree_filename = tkFileDialog.askopenfilename(initialdir = self.host.settings.work_dir, filetypes = (("Inkscape vector file", "*.svg"), ))
        if tree_filename == "": # Cancel
            return
        old_order_seqs = Aln_basic.read_fasta_from_strings(self.fixed.get_strings())

        sort_and_color_path = os.path.join(self.host.settings.script_dir, "sort_and_color.py")
        fixed_filename = os.path.join(self.host.settings.work_dir, "%s.fixed" % self.host.temp_name)
        Aln_basic.write_widget_into_file(self.fixed.text_widget, fixed_filename)

        if self.host.verbose.get():
            os.system("%s -i %s -w %s -o %s -t %s" % (sort_and_color_path, os.path.basename(fixed_filename), self.host.settings.work_dir, 
                                                      self.host.temp_name, tree_filename))
        else:
            os.system("%s -i %s -w %s -o %s -t %s 1> nul 2> nul" % (sort_and_color_path, os.path.basename(fixed_filename), 
                                                                    self.host.settings.work_dir, self.host.temp_name, tree_filename))

        temp_sorted_filename = os.path.join(self.host.settings.work_dir, "%s.tree_sorted" % self.host.temp_name)
        Aln_basic.read_widget_from_file(self.fixed.text_widget, temp_sorted_filename)
                
        self.IDs.text_widget.delete(1.0, tkinter.END)
        new_order_seqs = Aln_basic.read_fasta_from_strings(self.fixed.get_strings())
        new_ids = ""
        for s in new_order_seqs:
            new_ids += "%s\n" % s.ID            
        self.IDs.text_widget.insert(tkinter.END, new_ids)

        print ("    Alignment was sorted according to the %s tree file!" % tree_filename)
        self.host.log_tab.write_to_log("Alignment was sorted according to the tree file:\n%s" % tree_filename, True)
        if len(new_order_seqs) != len(old_order_seqs):
            print ("    Number of sequences reduced from %i to %i! Possibly IDs in the tree file was interpreted badly" % (len(old_order_seqs), len(new_order_seqs)))
            self.host.set_status("Number of sequences reduced from %i to %i!" % (len(old_order_seqs), len(new_order_seqs)))
Esempio n. 14
0
    def create_UI(self):
        self.grid_rowconfigure(0, weight = 1)
        self.grid_columnconfigure(0, weight = 1)
        central_panel = tkinter.PanedWindow(self, orient = tkinter.HORIZONTAL, sashwidth = self.p * 2, sashrelief = tkinter.RIDGE, background = self.host.back)
        central_panel.grid(row = 0, column = 0, sticky = "NSEW")

        self.auto_log = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, None, None, 
                        "Automatically generated log:", "")
        self.auto_log.button.grid_forget()
        self.auto_log.text_widget.tag_configure("important", background = "#FFFF00")
        self.wrap_auto = tkinter.BooleanVar()
        c = tkinter.Checkbutton(self.auto_log.panel, text = "Wrap text", variable = self.wrap_auto, command = self.wrap_configure)
        c.grid(row = 0, column = 1, sticky ="NSE", padx = self.p, pady = self.p)
        central_panel.add(self.auto_log)

        self.manual_log = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, None, None, 
                        "Manually generated log:", "")
        self.manual_log.button.grid_forget()
        self.wrap_manual = tkinter.BooleanVar()
        c = tkinter.Checkbutton(self.manual_log.panel, text = "Wrap text", variable = self.wrap_manual, command = self.wrap_configure)
        c.grid(row = 0, column = 1, sticky ="NSE", padx = self.p, pady = self.p)
        central_panel.add(self.manual_log)
Esempio n. 15
0
    def check_input(self):
        print ("    Checking input alignment...")
        if self.aln_input_frame.text_is_empty(): # No alignment is provided
            self.host.set_status("No alignment is provided!", "#FF0000")
        else:
            aligned_filename = os.path.join(self.host.settings.work_dir, "%s.aln" % self.host.temp_name)  
            Aln_basic.write_widget_into_file(self.aln_input_frame.text_widget, aligned_filename)    
            import udav_base
            try:
                seq_list = udav_base.read_alignment(aligned_filename)
                if type(seq_list) == type(""): # This means that at least one sequence in alignment differs in length from other
                    self.host.set_status(seq_list, "#FF0000")
                else:
                    status_OK = True
                    id_to_name = dict()
                    for s in seq_list:
                        s.remove_limits(False, False)                   
                        if s.ID in id_to_name: # Identical protein IDs detected
                            status_OK = False
                            print ("    [..WARNING..] Identical protein ID detected: '%s'" % s.ID)
                        id_to_name[s.ID] = s.name

                        my_format = re.match("^[^\|]+\|[^\|]+\|[^\|]+$", s.name)
                        my_format_simple = re.match("^[^\|]+\|[^\|]+$", s.name)
                        if not (my_format or my_format_simple) and not ((s.name == "BLOCKS") or (s.name == "SITE")):
                            status_OK = False
                            print ("    [..WARNING..] This name can fail a purification step. Consider 'My' format instead")
                            print ("    Current name: '%s'" % s.name)
                            print ("    'My' format example: 'ID|smth|organism' or 'ID|organism'")                           
                    if status_OK:
                        self.host.set_status("OK")
                    else:
                        self.host.set_status("Alignment has problems with names format, check console for details", "#888800")
            except IndexError:
                self.host.set_status("Alignment is corrupted; check that it is in FASTA format!", "#888800")
            del udav_base
            os.remove(aligned_filename)
        print ("    [..DONE..]")
Esempio n. 16
0
    def check_settings(self):
        """
        This method checks if all required settings are correctly set. That is:
        1) there should be settings for work_dir, hmmer_dir, muscle_dir, script_dir, pfam_profiles 
           and cog_profiles attributes of <self.settings>;
        2) directories <self.settings.work_dir> and <self.settings.script_dir> must exist, files 
           <self.settings.pfam_profiles> and <self.settings.cog_profiles> must exist;
        3) programs 'muscle' and 'hmmbuild', 'hmmpress', 'hmmscan' must exist in respective directories.
        """
        results = list()
        results.append(
            Aln_basic.exists(self.settings, "muscle_dir", os.path.isdir,
                             "Muscle directory"))
        results.append(
            Aln_basic.exists(self.settings, "hmmer_dir", os.path.isdir,
                             "HMMer directory"))
        results.append(
            Aln_basic.exists(self.settings, "work_dir", os.path.isdir,
                             "Working directory"))
        results.append(
            Aln_basic.exists(self.settings, "cog_profiles", os.path.isfile,
                             "Database of COG profiles"))
        results.append(
            Aln_basic.exists(self.settings, "pfam_profiles", os.path.isfile,
                             "Database of Pfam profiles"))
        extension = ""
        if platform.system() == "Windows":
            extension = ".exe"
        results.append(
            Aln_basic.exists(self.settings, "muscle_dir", os.path.isfile,
                             "Muscle program", "muscle" + extension))
        results.append(
            Aln_basic.exists(self.settings, "hmmer_dir", os.path.isfile,
                             "HMMbuild program", "hmmbuild" + extension))
        results.append(
            Aln_basic.exists(self.settings, "hmmer_dir", os.path.isfile,
                             "HMMpress program", "hmmpress" + extension))
        results.append(
            Aln_basic.exists(self.settings, "hmmer_dir", os.path.isfile,
                             "HMMscan program", "hmmscan" + extension))

        if False in results:
            sys.exit()
Esempio n. 17
0
    def create_UI(self):
        self.grid_rowconfigure(0, weight = 1)
        self.grid_columnconfigure(0, weight = 1)
        central_panel = tkinter.PanedWindow(self, orient = tkinter.HORIZONTAL, sashwidth = self.p * 2, sashrelief = tkinter.RIDGE, background = self.host.back)
        central_panel.grid(row = 0, column = 0, sticky = "NSEW")
        self.aln_input_frame = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, self.host.header, "#FFFFFF", 
                               "Insert multiple alignment or generate it from the left panel:", "Check input")
        self.aln_input_frame.button.configure(command = self.check_input)
        central_panel.add(self.aln_input_frame)
        
        self.seq_input_frame = Aln_basic.TextFrameWithLabelAndButton(central_panel, self.p, self.host.header, "#FFFFFF",
                               "Insert unaligned sequences:", "Align now!")
        self.seq_input_frame.button.configure(command = self.align)
        central_panel.add(self.seq_input_frame)         

        self.insert_blocks = tkinter.BooleanVar()
        self.insert_blocks.set(False)
        c = tkinter.Checkbutton(self.seq_input_frame.panel, text = "Insert BLOCKS & SITE", variable = self.insert_blocks)
        c.grid(row = 0, column = 2, sticky ="NSW", padx = self.p, pady = self.p)

        tkinter.Label(self.seq_input_frame.panel, text = "maxiters").grid(row = 0, column = 3, sticky = "NSW")
        self.maxiters = tkinter.Entry(self.seq_input_frame.panel, width = 2)
        self.maxiters.insert(tkinter.END, "2")
        self.maxiters.grid(row = 0, column = 4, sticky = "NSW", padx = self.p, pady = self.p)

        tkinter.Label(self.seq_input_frame.panel, text = "gapopen").grid(row = 0, column = 5, sticky = "NSW")
        self.gapopen = tkinter.Entry(self.seq_input_frame.panel, width = 3)
        self.gapopen.insert(tkinter.END, "")
        self.gapopen.grid(row = 0, column = 6, sticky = "NSW", padx = self.p, pady = self.p)

        tkinter.Label(self.seq_input_frame.panel, text = "gapextend").grid(row = 0, column = 7, sticky = "NSW")
        self.gapextend = tkinter.Entry(self.seq_input_frame.panel, width = 3)
        self.gapextend.insert(tkinter.END, "")
        self.gapextend.grid(row = 0, column = 8, sticky = "NSW", padx = self.p, pady = self.p)

        filter_identical = tkinter.Button(self.seq_input_frame.panel, text = "Filter identical", command = self.filter_seq)
        filter_identical.grid(row = 0, column = 9, sticky = "NSW", padx = self.p, pady = self.p)
Esempio n. 18
0
    def create_UI(self):
        self.grid_rowconfigure(0, weight = 1)
        self.grid_columnconfigure(0, weight = 1)
        self.input_frame = Aln_basic.TextFrameWithLabelAndButton(self, self.p, self.host.header, "#FFFFFF", 
                           "Insert sequences in FASTA format:", "Obtain taxonomy units")
        self.input_frame.grid(row = 0, column = 0, sticky = "NSEW", padx = self.p, pady = self.p)
        self.input_frame.button.configure(command = self.filter)
        tkinter.Label(self.input_frame.panel, text = "Input format:").grid(row = 0, column = 2, sticky = "NSW")
        self.input_format = tkinter.StringVar()
        self.input_format.set("My")
        input_format = tkinter.OptionMenu(self.input_frame.panel, self.input_format, "URef", "NCBI", "Uniprot", "PDB", "COGalyser", "Basic", "COG", "OldRef", "My_Ref", "My", "Olesya", "German") 
        input_format.grid(row = 0, column = 3, sticky = "NSW", padx = self.p, pady = self.p)      
        help_button = tkinter.Button(self.input_frame.panel, state = tkinter.DISABLED, text = "Format help", command = self.show_help)
        help_button.grid(row = 0, column = 4, sticky = "NSW", padx = self.p, pady = self.p)   

        self.configure_frame = 
Esempio n. 19
0
 def histo_to_log(self):
     values = list()
     seqs = Aln_basic.read_fasta_from_strings(self.pure.get_strings())
     for s in seqs:            
         values.append(len(s.sequence))
     info_string = "Length of proteins in the alignment"        
     begin = 0
     step = 0
     nsteps = 0
     try:
         begin = int(self.begin_entry.get())
         step = int(self.step_entry.get())
         nsteps = int(self.nsteps_entry.get())
     except ValueError:
         print ("    [..WARNING..] Enter proper begin, step and number of steps before printing!")
         return
     self.host.log_tab.write_histogram(values, info_string, begin, step, nsteps)
Esempio n. 20
0
    def check_pending(self):
        files_ready = list()
        i = 0
        while i < len(self.pending_filenames):
            filename = self.pending_filenames[i]
            if os.path.isfile(filename):
                if os.path.getctime(filename) != os.path.getmtime(filename):
                    files_ready.append(filename)
                    self.pending_filenames.pop(i)
                    i -= 1
            i += 1
        if len(self.pending_filenames) == 0:
            self.disable_check_button()

        ready_and_required = list()
        for curr_file in files_ready:
            base_name = os.path.basename(curr_file)
            name_parts = base_name.split(".")
            if len(
                    name_parts
            ) != 2:  # Files without extension or with multiple dots are not considered
                continue
            project_name = name_parts[0]
            extension = name_parts[1]
            if project_name == self.get_project_name():
                if extension == "Pfam_table":
                    Aln_basic.read_widget_from_file(
                        self.features_tab.hmmresults_Pfam.text_widget,
                        curr_file)
                    ready_and_required.append(extension)
                if extension == "COG_table":
                    Aln_basic.read_widget_from_file(
                        self.features_tab.hmmresults_COG.text_widget,
                        curr_file)
                    ready_and_required.append(extension)
                if extension == "TMHMM":
                    Aln_basic.read_widget_from_file(
                        self.features_tab.TMHMM_results.text_widget, curr_file)
                    ready_and_required.append(extension)

        if len(ready_and_required) == 0:  # No files are ready
            self.set_status("Sorry, no file delivered!")
        else:
            self.set_status("These files were ready and loaded: %s" %
                            ",".join(ready_and_required))
Esempio n. 21
0
 def save_alignment(self):
     aligned_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.aln" % self.host.get_project_name())
     Aln_basic.write_widget_into_file(self.aln_input_frame.text_widget, aligned_filename, False)
Esempio n. 22
0
    def save_logs(self, ask_if_exists = True):
        auto_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.auto_log" % self.host.get_project_name())
        Aln_basic.write_widget_into_file(self.auto_log.text_widget, auto_filename, ask_if_exists, True)

        man_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.man_log" % self.host.get_project_name())
        Aln_basic.write_widget_into_file(self.manual_log.text_widget, man_filename, ask_if_exists, True)
Esempio n. 23
0
    def parse(self):
        aligned_filename = os.path.join(self.settings.work_dir,
                                        "%s.aln" % self.temp_name)
        Aln_basic.write_widget_into_file(
            self.input_tab.aln_input_frame.text_widget, aligned_filename)
        remove_seq_limits_path = os.path.join(self.settings.script_dir,
                                              "remove_seq_limits.py")

        self.set_status("Working")
        if self.verbose.get():
            os.system("%s -i %s -w %s -o %s -d -x" %
                      (remove_seq_limits_path, "%s.aln" % self.temp_name,
                       self.settings.work_dir, self.temp_name))
        else:
            os.system("%s -i %s -w %s -o %s -d -x 1> nul 2> nul" %
                      (remove_seq_limits_path, "%s.aln" % self.temp_name,
                       self.settings.work_dir, self.temp_name))
        self.set_status("Ready")

        try:
            os.remove(
                os.path.join(self.settings.work_dir,
                             "%s.aln" % self.temp_name))
            os.remove(
                os.path.join(self.settings.work_dir,
                             "%s.blocks" % self.temp_name))
            os.remove(
                os.path.join(self.settings.work_dir,
                             "%s.motif" % self.temp_name))
            os.remove(
                os.path.join(self.settings.work_dir,
                             "%s.motif_var" % self.temp_name))
            os.remove(
                os.path.join(self.settings.work_dir,
                             "%s.orgs" % self.temp_name))
            os.remove(
                os.path.join(self.settings.work_dir,
                             "%s.pure.correspond" % self.temp_name))
        except OSError:
            pass

        fixed_filename = os.path.join(self.settings.work_dir,
                                      "%s.fixed" % self.temp_name)
        Aln_basic.read_widget_from_file(self.parse_tab.fixed.text_widget,
                                        fixed_filename)
        os.remove(fixed_filename)

        pure_filename = os.path.join(self.settings.work_dir,
                                     "%s.pure" % self.temp_name)
        Aln_basic.read_widget_from_file(self.parse_tab.pure.text_widget,
                                        pure_filename)
        self.parse_tab.enable_pure_analysis()
        os.remove(pure_filename)

        blocks_filename = os.path.join(self.settings.work_dir,
                                       "%s.blocks_regions" % self.temp_name)
        if os.path.isfile(blocks_filename):
            Aln_basic.read_widget_from_file(self.parse_tab.blocks.text_widget,
                                            blocks_filename)
            os.remove(blocks_filename)

        ids_filename = os.path.join(self.settings.work_dir,
                                    "%s.ids" % self.temp_name)
        Aln_basic.read_widget_from_file(self.parse_tab.IDs.text_widget,
                                        ids_filename)
        os.remove(ids_filename)

        self.parse_tab.check_numbers()
Esempio n. 24
0
    def purify(self):
        #seqs = Aln_basic.read_fasta_from_strings(self.input_tab.aln_input_frame.get_strings())
        seqs = Aln_basic.read_fasta_from_strings(
            self.parse_tab.fixed.get_strings())
        if len(seqs) == 0:
            self.set_status("No alignment to purify!")
            return

        import udav_base
        self.set_status("Working")
        # --------------------------------------- 1) Calculating cut for mainly gappy parts of alignment
        max_name_length = 50
        separator = "  :  "
        presence_threshold = 50
        try:
            presence_threshold = int(self.purify_tab.presence_entry.get())
        except:
            print("Using default presence threshold value = 50!")

        (valid_start, valid_end) = Aln_basic.get_valid_alignment_range(
            seqs, presence_threshold)
        self.purify_tab.alignment.add_label_data("showing a region [%i; %i]" %
                                                 (valid_start + 1, valid_end))
        self.purify_tab.alignment.text_widget.delete(1.0, tkinter.END)

        # --------------------------------------- 2) Printing alignment into the text widget tab
        self.set_status("Printing alignment", "#FF0000")
        seqs_cut = list(
        )  # List of sequences with the mainly gappy parts of alignment cut
        id_to_org_and_seq = dict(
        )  # Hash of protein ids to a tuple of (0) their organism name and (1) cut sequences
        id_list = list()  # List of protein ids in order of their occurence
        for i in range(len(seqs)):
            # Printing to the widget
            fit_name = seqs[i].name[0:max_name_length]
            if len(fit_name) < max_name_length:
                fit_name += (max_name_length - len(fit_name)) * " "
            seq_part = seqs[i].sequence[valid_start:valid_end]
            seqs_cut.append(udav_base.Sequence(seqs[i].name, seq_part))
            string = fit_name + ("%s%s" % (separator, seq_part))
            self.purify_tab.alignment.text_widget.insert(
                tkinter.END, "%s\n" % string)
            # Saving data
            if seqs[i].ID in id_to_org_and_seq:
                print(
                    "    [..WARNING..] Non-unique ID '%s' detected; purification may work unproperly!"
                    % seqs[i].ID)

            curr_org_name = seqs[i].name.replace(seqs[i].ID, "")
            if re.match("^[^\|]+\|[^\|]+\|[^\|]+$", seqs[i].name):
                curr_org_name = seqs[i].name.split("|")[2]
            elif re.match("^[^\|]+\|[^\|]+$", seqs[i].name):
                curr_org_name = seqs[i].name.split("|")[1]

            id_to_org_and_seq[seqs[i].ID] = (curr_org_name, seqs[i].sequence)
            id_list.append(seqs[i].ID)

        self.purify_tab.id_to_org_and_seq = id_to_org_and_seq
        self.purify_tab.id_list = id_list
        self.purify_tab.seqs_cut = seqs_cut
        self.purify_tab.id_to_features = None
        self.purify_tab.featured_sequences = None
        self.purify_tab.valid_start = valid_start
        self.purify_tab.valid_end = valid_end
        self.purify_tab.name_length = max_name_length + len(separator)
        self.purify_tab.activate_buttons()
        #FIX: version 0.2.8 (self-hits data is set to default)
        self.purify_tab.evalue_threshold.delete(0, tkinter.END)
        self.purify_tab.evalue_threshold.configure(state=tkinter.DISABLED)
        self.purify_tab.sigma_num_self.delete(0, tkinter.END)
        self.purify_tab.sigma_num_self.configure(state=tkinter.DISABLED)

        del udav_base
        self.set_status("Ready")
Esempio n. 25
0
 def save_sequence_sample(self):
     sequence_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.sample" % self.host.get_project_name())
     Aln_basic.write_widget_into_file(self.seq_input_frame.text_widget, sequence_filename, False)
Esempio n. 26
0
    def load_project(self):
        project_dir = os.path.join(self.settings.work_dir,
                                   self.get_project_name())
        if self.get_project_name() == "":  # No project name was entered
            project_dir = tkFileDialog.askdirectory(
                initialdir=self.settings.work_dir,
                title="Please select a folder with the project:")
            if project_dir == "":  # Cancel
                return
        if not os.path.isdir(project_dir):
            self.set_status(
                "This project does not exist, please check its name again!",
                "#FF0000")
            return
        #answer = tkMessageBox.askyesno("Please confirm loading", "Are you sure you want to load %s project? All existing data in the frames, if any, will be re-writed!" % self.get_project_name())
        #if answer != True:
        #   return
        self.clear_all()
        self.project_title_widget.insert(tkinter.END,
                                         os.path.basename(project_dir))

        self.set_status("Working")
        print("-------- Project %s is now loading! --------" %
              self.get_project_name())

        project_files = os.listdir(project_dir)
        extension_to_widget = {
            "aln": self.input_tab.aln_input_frame.text_widget,
            "sample": self.input_tab.seq_input_frame.text_widget,
            "fixed": self.parse_tab.fixed.text_widget,
            "pure": self.parse_tab.pure.text_widget,
            "blocks_regions": self.parse_tab.blocks.text_widget,
            "ids": self.parse_tab.IDs.text_widget,
            "COG_table": self.features_tab.hmmresults_COG.text_widget,
            "Pfam_table": self.features_tab.hmmresults_Pfam.text_widget,
            "TMHMM": self.features_tab.TMHMM_results.text_widget,
            #"features"       : self.features_tab.features.text_widget, #FIX: version 0.2.8 (features should not be loaded)
            "auto_log": self.log_tab.auto_log.text_widget,
            "man_log": self.log_tab.manual_log.text_widget
        }

        for curr_file in project_files:
            name_parts = curr_file.split(".")
            if len(
                    name_parts
            ) != 2:  # Files without extension or with multiple dots are not considered
                continue
            project_name = name_parts[0]
            extension = name_parts[1]
            full_filename = os.path.join(project_dir, curr_file)
            if project_name == self.get_project_name():
                if extension in extension_to_widget:
                    Aln_basic.read_widget_from_file(
                        extension_to_widget[extension], full_filename)
                if extension == "pure":
                    self.parse_tab.enable_pure_analysis()
                if extension == "actions":
                    self.purify_tab.load_actions(full_filename)
                if extension == "colors":
                    self.load_colors_from_file(full_filename)

        self.parse_tab.check_numbers()
        self.log_tab.color_important()
        self.set_status("Ready")
Esempio n. 27
0
 def save_pure(self, ask_filename = True):
     pure_filename = os.path.join(self.host.settings.work_dir, self.host.get_project_name(), "%s.pure" % self.host.get_project_name())
     if ask_filename:
         pure_filename = tkFileDialog.asksaveasfilename(filetypes = (("Non-aligned sequences with pure names (fasta)", "*.pure"), ("All", "*.*")))
     Aln_basic.write_widget_into_file(self.pure.text_widget, pure_filename, ask_filename)
Esempio n. 28
0
    def create_UI(self):
        self.grid_rowconfigure(0, weight=1)
        self.grid_columnconfigure(0, weight=1)
        central_panel = tkinter.PanedWindow(self,
                                            orient=tkinter.HORIZONTAL,
                                            sashwidth=self.p * 2,
                                            sashrelief=tkinter.RIDGE,
                                            background=self.host.back)
        central_panel.grid(row=0, column=0, sticky="NSEW")
        self.input_frame = Aln_basic.TextFrameWithLabelAndButton(
            central_panel, self.p, self.host.header, "#FFFFFF",
            "Insert FASTA-format sequences:", "=> Convert =>")
        self.input_frame.button.configure(command=self.convert)
        tkinter.Label(self.input_frame.panel,
                      text="Input format:").grid(row=0, column=2, sticky="NSW")
        self.input_format = tkinter.StringVar()
        self.input_format.set("Basic")
        #input_format = tkinter.OptionMenu(self.input_frame.panel, self.input_format, "URef", "NCBI", "NCBI_2016", "Uniprot", "PDB", "COGcollator", "Basic", "COG", "OldRef", "My_Ref", "My", "Olesya", "German")
        input_format = tkinter.OptionMenu(self.input_frame.panel,
                                          self.input_format, "Basic", "NCBI",
                                          "NCBI_2016-", "Uniprot", "PDB",
                                          "COGcollator", "URef", "My")
        input_format.grid(row=0,
                          column=3,
                          sticky="NSW",
                          padx=self.p,
                          pady=self.p)
        help_button = tkinter.Button(self.input_frame.panel,
                                     state=tkinter.DISABLED,
                                     text="Format help",
                                     command=self.show_help)
        help_button.grid(row=0,
                         column=4,
                         sticky="NSW",
                         padx=self.p,
                         pady=self.p)
        central_panel.add(self.input_frame)

        self.output_frame = Aln_basic.TextFrameWithLabelAndButton(
            central_panel, self.p, self.host.header, "#FFFFFF",
            "Your result will be shown here:", "")
        self.output_frame.button.grid_forget()
        tkinter.Label(self.output_frame.panel,
                      text="Output format:").grid(row=0,
                                                  column=1,
                                                  sticky="NSW")

        self.output_format = tkinter.StringVar()
        self.output_format.set("My")
        output_format = tkinter.OptionMenu(self.output_frame.panel,
                                           self.output_format, "My", "Basic",
                                           "NCBI", "ID", "Table",
                                           "Same but fixed")
        output_format.grid(row=0,
                           column=2,
                           sticky="NSW",
                           padx=self.p,
                           pady=self.p)
        tkinter.Label(self.output_frame.panel,
                      text="Group label (optional):").grid(row=0,
                                                           column=3,
                                                           sticky="NSW")
        self.output_class = tkinter.Entry(self.output_frame.panel, width=15)
        self.output_class.grid(row=0,
                               column=4,
                               sticky="NSW",
                               padx=self.p,
                               pady=self.p)

        self.replace_spaces = tkinter.BooleanVar()
        self.replace_spaces.set(True)
        c = tkinter.Checkbutton(self.output_frame.panel,
                                text="Replace spaces",
                                variable=self.replace_spaces)
        c.grid(row=0, column=5, sticky="NSW", padx=self.p, pady=self.p)
        self.id_mode = tkinter.StringVar()
        self.id_mode.set("GI")
        radio = tkinter.Radiobutton(self.output_frame.panel,
                                    text="GI",
                                    variable=self.id_mode,
                                    value="GI")
        radio.grid(row=0, column=6, sticky="NSW", padx=self.p, pady=self.p)
        radio = tkinter.Radiobutton(self.output_frame.panel,
                                    text="protein_id",
                                    variable=self.id_mode,
                                    value="ID")
        radio.grid(row=0, column=7, sticky="NSW", padx=self.p, pady=self.p)
        radio = tkinter.Radiobutton(self.output_frame.panel,
                                    text="locus",
                                    variable=self.id_mode,
                                    value="locus")
        radio.grid(row=0, column=8, sticky="NSW", padx=self.p, pady=self.p)
        self.unalign = tkinter.BooleanVar()
        self.unalign.set(False)
        c = tkinter.Checkbutton(self.output_frame.panel,
                                text="Unalign",
                                variable=self.unalign)
        c.grid(row=0, column=9, sticky="NSW", padx=self.p, pady=self.p)

        central_panel.add(self.output_frame)
Esempio n. 29
0
    def filter_seq(self):
        """
        This method filters sequences in the input frame which have the same ID.
        Also could filter identical protein sequences.
        """
        seqs = Aln_basic.read_fasta_from_strings(self.seq_input_frame.get_strings())
        seq_ids_unique = dict()
        seqs_unique = dict()        
        i = 0
        r = 0
        s = 0
        seq_size = len(seqs)
        bad_ids = list()
        identical_seq_ids = dict()
        smooth = True        
        while i < len(seqs):
            if not seqs[i].ID in seq_ids_unique: # This is normal sequence            
                seq_ids_unique[seqs[i].ID] = seqs[i].sequence

            else:
                if seqs[i].sequence != seq_ids_unique[seqs[i].ID]: # Sequences differs
                    smooth = False
                    bad_ids.append(seqs[i].ID)                    
                else:
                    seqs.pop(i)
                    i -= 1
                    r += 1
            if not seqs[i].sequence in seqs_unique:
                seqs_unique[seqs[i].sequence] = True
            else:
                s += 1
                identical_seq_ids[seqs[i].ID] = True
            i += 1

        if len(identical_seq_ids) != 0:
            answer = tkMessageBox.askyesno("Filter identical sequences?", "We found %i sequences which are identical with some other sequence in alignment. Do you want to remove them?" % len(identical_seq_ids), icon = "question", parent = self)
            if answer == True:
                i = 0
                while i < len(seqs):
                    if seqs[i].ID in identical_seq_ids:
                        seqs.pop(i)
                        i -= 1
                        r += 1
                    i += 1                   

        self.seq_input_frame.text_widget.delete(1.0, tkinter.END)
        for s in seqs:
            self.seq_input_frame.text_widget.insert(tkinter.END, ">%s\n%s\n\n" % (s.name, s.sequence))

        curr_message = "Filtering of %i input sequences; %i sequences removed; %i remained\n" % (seq_size, r, len(seqs))
        if smooth:           
            self.host.set_status("Filtering gained success; %i sequences removed!" % r, self.host.header)
            curr_message += "Filtering gained success, all non-unique IDs removed\n"
        else:
            self.host.set_status("Filtering NOT gained success; see console for details!")
            print ("These sequences have duplicate IDs '%s' but different sequences; NOT removed:")
            curr_message += "Filtering NOT gained success, NOT all non-unique IDs removed; these remains:"
            for bad in bad_ids:
                print (bad)
                curr_message += "%s\n" % bad
        self.host.log_tab.write_to_log(curr_message, True)
Esempio n. 30
0
    def create_UI(self):
        self.grid_rowconfigure(0, weight=1)
        self.grid_columnconfigure(0, weight=1)
        central_panel = tkinter.PanedWindow(self,
                                            orient=tkinter.HORIZONTAL,
                                            sashwidth=self.p * 2,
                                            sashrelief=tkinter.RIDGE,
                                            background=self.host.back)
        central_panel.grid(row=0, column=0, sticky="NSEW")

        base_frame = tkinter.Frame(central_panel)
        base_frame.columnconfigure(0, weight=1)
        base_frame.rowconfigure(1, weight=1)

        top_frame = tkinter.Frame(base_frame)
        tkinter.Label(top_frame, text="Max e-value:").grid(row=0,
                                                           column=0,
                                                           sticky="NSW")
        self.evalue_threshold = tkinter.Entry(top_frame,
                                              state=tkinter.DISABLED,
                                              width=8)
        self.evalue_threshold.grid(row=0,
                                   column=1,
                                   sticky="NSW",
                                   padx=self.p,
                                   pady=self.p)
        tkinter.Label(top_frame, text="# of σ (hit):").grid(row=0,
                                                            column=2,
                                                            sticky="NSW")
        self.sigma_num_self = tkinter.Entry(top_frame,
                                            state=tkinter.DISABLED,
                                            width=4)
        self.sigma_num_self.grid(row=0,
                                 column=3,
                                 sticky="NSW",
                                 padx=self.p,
                                 pady=self.p)
        self.self_hits_button = tkinter.Button(top_frame,
                                               state=tkinter.DISABLED,
                                               text="Find self-hits",
                                               command=self.find_self_hits)
        self.self_hits_button.grid(row=0,
                                   column=4,
                                   sticky="NSW",
                                   padx=self.p,
                                   pady=self.p)
        self.white_crow_button = tkinter.Button(
            top_frame,
            state=tkinter.DISABLED,
            text="Apply options",
            command=self.apply_purification_options)
        self.white_crow_button.grid(row=1,
                                    column=4,
                                    sticky="NSW",
                                    padx=self.p,
                                    pady=self.p)
        tkinter.Label(top_frame, text="Min length:").grid(row=1,
                                                          column=0,
                                                          sticky="NSW")
        self.min_length = tkinter.Entry(top_frame, width=6)
        self.min_length.insert(tkinter.END, "0")
        self.min_length.grid(row=1,
                             column=1,
                             sticky="NSW",
                             padx=self.p,
                             pady=self.p)
        tkinter.Label(top_frame, text="Max length:").grid(row=1,
                                                          column=2,
                                                          sticky="NSW")
        self.max_length = tkinter.Entry(top_frame, width=6)
        self.max_length.insert(tkinter.END, "10000")
        self.max_length.grid(row=1,
                             column=3,
                             sticky="NSW",
                             padx=self.p,
                             pady=self.p)

        top_frame.grid(row=0, column=0, columnspan=2, sticky="NSEW")

        y_scrollbar = tkinter.Scrollbar(base_frame)
        y_scrollbar.grid(row=1, column=1, sticky="NS")
        actions = ttk.Treeview(base_frame,
                               columns=("action", "organism"),
                               selectmode="extended",
                               yscrollcommand=y_scrollbar.set)
        y_scrollbar.config(command=actions.yview)
        actions.grid(row=1, column=0, sticky="NSEW")
        actions.column("action", width=50, anchor="w")
        actions.heading("action", text="Action")
        actions.column("organism", width=50, anchor="w")
        actions.heading("organism", text="Organism")
        actions.bind("<Double-Button-1>", self.get_click)
        self.actions = actions
        self.actions.menu_available = True  # To show <ActionMenu> only at this widget
        central_panel.add(base_frame)

        self.alignment = Aln_basic.TextFrameWithLabelAndButton(
            central_panel, self.p, self.host.header, "#FFFFFF",
            "Suggested changes in alignment:", "Load domains")
        self.alignment.button.configure(state=tkinter.DISABLED,
                                        command=self.create_domain_tags)
        self.alignment.text_widget.purify_text_widget = True  # To show <TextMenu> only at this widget
        self.show_features = tkinter.Button(self.alignment.panel,
                                            state=tkinter.DISABLED,
                                            text="Show domains",
                                            command=self.show_domains)
        self.show_features.grid(row=0,
                                column=2,
                                sticky="NSW",
                                padx=self.p,
                                pady=self.p)
        self.hide_features = tkinter.Button(self.alignment.panel,
                                            state=tkinter.DISABLED,
                                            text="Hide domains",
                                            command=self.hide_domains)
        self.hide_features.grid(row=0,
                                column=3,
                                sticky="NSW",
                                padx=self.p,
                                pady=self.p)
        self.load_taxonomy = tkinter.Button(self.alignment.panel,
                                            state=tkinter.DISABLED,
                                            text="Color taxonomy",
                                            command=self.color_taxonomy)
        self.load_taxonomy.grid(row=0,
                                column=4,
                                sticky="NSW",
                                padx=self.p,
                                pady=self.p)
        self.act = tkinter.Button(self.alignment.panel,
                                  state=tkinter.DISABLED,
                                  text="Apply actions",
                                  background=self.host.header,
                                  foreground="#FFFFFF",
                                  command=self.apply_actions)
        self.act.grid(row=0,
                      column=5,
                      sticky="NSW",
                      padx=self.p * 3,
                      pady=self.p)
        tkinter.Label(self.alignment.panel,
                      text="Presence required (%):").grid(row=0, column=6)
        self.presence_entry = tkinter.Entry(self.alignment.panel, width=3)
        self.presence_entry.insert(tkinter.END, "0")
        self.presence_entry.grid(row=0, column=7, padx=self.p, pady=self.p)

        central_panel.add(self.alignment)

        self.update_idletasks()
        central_panel.sash_place(0, base_frame.winfo_reqwidth(), 1)