class Globby_Text_Editor(tk.Frame): def __init__(self, parent_widget, settings): # some initial values # TODO this Values are obsolete since Project_Settings covers them # --> self.settings.projects_path self.hash_opened_filename = None self.opened_filename = None self.settings = settings self.edit_button_list=[ {'text':'new page', 'cmd':self.on_new_page, 'keytxt':'CTRL+n','hotkey':'<Control-n>'}, {'text':'del page', 'cmd':self.on_del_page, 'keytxt':'CTRL+n','hotkey':'<DELETE>'} , {'text':'save', 'cmd':self.on_save, 'keytxt':'CTRL+s','hotkey':'<Control-s>'}, {'text':'undo', 'cmd':self.on_undo, 'keytxt':'CTRL+z','hotkey':'<Control-z>'}, {'text':'redo', 'cmd':self.on_redo, 'keytxt':'CTRL+y','hotkey':'<Control-y>'}] self.syntax_button_list=[ {'text':'**bold**', 'cmd':self.on_tag_insert, 'open_tag':'**', 'close_tag':'**','keytxt':'CTRL+b','hotkey':'<Control-b>'}, {'text':'//italic//', 'cmd':self.on_tag_insert, 'open_tag':'//', 'close_tag':'//', 'keytxt':'CTRL+i','hotkey':'<Control-i>'}, {'text':'__underline__', 'cmd':self.on_tag_insert, 'open_tag':'__', 'close_tag':'__', 'keytxt':'CTRL+u','hotkey':'<Control-u>'}, {'text':'[Link]', 'cmd':self.on_tag_insert, 'open_tag':'[', 'close_tag':']', 'keytxt':'CTRL+l','hotkey':'<Control-l>'}, {'text':'¸¸sub¸¸', 'cmd':self.on_tag_insert, 'open_tag':'¸¸', 'close_tag':'¸¸', 'keytxt':'CTRL+d','hotkey':'<Control-d>'}, {'text':'^^upper^^', 'cmd':self.on_tag_insert, 'open_tag':'^^', 'close_tag':'^^', 'keytxt':'CTRL+q','hotkey':'<Control-q>'}, {'text':'-~smaller~-', 'cmd':self.on_tag_insert, 'open_tag':'-~', 'close_tag':'~-', 'keytxt':'CTRL+w','hotkey':'<Control-w>'}, {'text':'+~bigger~+', 'cmd':self.on_tag_insert, 'open_tag':'+~', 'close_tag':'~+', 'keytxt':'CTRL+e','hotkey':'<Control-e>'}, {'text':'~~strike_thru~~', 'cmd':self.on_tag_insert, 'open_tag':'~~', 'close_tag':'~~', 'keytxt':'CTRL+t','hotkey':'<Control-t>'} ] # build Widgets tk.Frame.__init__(self, parent_widget) self.pack(fill=tk.BOTH, expand=tk.YES) #self.baseframe = tk.Frame(parent_widget) #self.baseframe.pack(fill=tk.BOTH, expand=tk.YES) self.editor() self.button_frame() # start tracking text changes inside the editfield thread.start_new_thread(self.on_txt_changes, ('',)) def editor(self): """ combine some Widgets to an enhanced editor (incl. Scrollbar) --> self.text the text widget itself --> self.opened_file_label Label on top of the editfield to show the name of the current opened File It can be used to show textchanges """ # build widgets self.txtfrm = tk.Frame(self) self.txtfrm.pack(fill=tk.BOTH, side=tk.LEFT, expand=tk.YES) self.opened_file_label = tk.Label(self.txtfrm, text="No File chosen") self.opened_file_label.pack(fill=tk.X) self.text = ScrolledText(self.txtfrm, bg="white", undo=1, maxundo=30, wrap=tk.WORD) self.text.pack(fill=tk.BOTH, expand=tk.YES, side=tk.LEFT) self.text.insert(1.0, u"Please open a File to edit") # build first(reference -- new name??) hash for comparison on changes self.hash_opened_filename = hash(self.text.get(1.0,tk.END)) # Set focus on textwidget and move cursor to the upper left self.text.focus_set() self.text.mark_set(tk.INSERT, '0.0') # goto line self.text.see(tk.INSERT) # scroll to line def label_button_row(self, parent_widget=None, btnlst=None, start_count=0): """Build a 2 column table with a label beside each button in a row. Bind a keyboard sequence to the button command. Display this keyboard sequence on the label. todo: - think about a parameter for the widget to bind the Hotkeys - rename to: labled_button_row, draw_labled_button_row Parameter: --> parent_widget: Parent widget to place the table --> btnlst: Type: List of dicts representing a button Example: {'text':'**bold**', # displayed on the Button (string) 'cmd':self.on_tag_insert, # command 'open_tag':'**', # chars representing the beginning # of a tag for inserting (string) 'close_tag':'**', # chars representing the end # of a tag for inserting (string) 'keytxt':'CTRL+b', # displayed on the Label (string) 'hotkey':'<Control-b>'} # keyboard sequence (string) Note: The existence of 'open_tag' and 'close_tag' in btnlst decides which command is bound to the Button. If they aren't there 'cmd' must be a function without parameters!!! otherwise 'cmd' needs following parameters: otag = btn['open_tag'] ctag = btn['close_tag'] event = None # Placeholder for a keysequence --> start_count: Type: int Description: The table is relized with tkinter grid layout manager. start_count is used if there is already a grid (with a Label beside a button). start_count can add the automatic genrated buttons under the existing. In Globby_Editor it is used to put a label_button_row under a Tkinter menubutton(file choose, headlines). """ i = start_count for btn in btnlst: try: otag = btn['open_tag'] ctag = btn['close_tag'] event = None doit = lambda e=event, o=otag, c=ctag:self.on_tag_insert(e,o,c) tk.Button(parent_widget, text=btn['text'], command=doit, relief=tk.RIDGE ).grid(column=0, row=i, sticky=tk.W+tk.E) self.text.bind(btn['hotkey'],doit) except KeyError: tk.Button(parent_widget, text=btn['text'], command=btn['cmd'], relief=tk.RIDGE ).grid(column=0, row=i, sticky=tk.W+tk.E) tk.Label(parent_widget, text=btn['keytxt'], relief=tk.FLAT ).grid(column=1, row=i, sticky=tk.W) i +=1 def button_frame(self): """draws a frame to hold a edit- and syntax-buttons under each other """ self.btnfrm = tk.Frame(self) self.btnfrm.pack(fill=tk.BOTH, side=tk.LEFT) self.edit_buttons() self.syntax_buttons() def edit_buttons(self): """draws a frame with buttons for editing (save, undo, redo, open) """ # genrate a labelframe self.efrm = tk.LabelFrame(self.btnfrm, text="Edit Buttons") self.efrm.pack(fill=tk.BOTH, padx=5, pady=5) # generate a button with a pulldown menue to open a file to edit self.file_open_mbtn = tk.Menubutton(self.efrm, text='Open File') # generate the pulldown menue self.file_open_menu = tk.Menu(self.file_open_mbtn, postcommand=self.gen_file2edit_menu) # bind the pulldown menue to the menubutton self.file_open_mbtn.config(menu=self.file_open_menu, relief=tk.RIDGE) self.file_open_mbtn.grid(column=0,row=0, sticky=tk.W+tk.E) # label beside the Button to display the associated keyboard shortcut self.file_open_lbl = tk.Label(self.efrm, text='CTRL+o', relief=tk.FLAT) self.file_open_lbl.grid(column=1, row=0, sticky=tk.W+tk.E) # generate buttons as described in self.edit_button_list self.label_button_row(self.efrm, self.edit_button_list, 2) # bind keyboard shortcut to the menue self.text.bind('<Control-o>', lambda e: self.file_open_menu.tk_popup(e.x_root, e.y_root)) def gen_file2edit_menu(self): """generates a (new) menu bound to the file chooser button so every time when a project is created or deleted gen_choose_project_menu should be called """ # delete all existing menue entrys self.file_open_menu.delete(0,tk.END) proj_path = os.path.join(self.settings.projects_path, self.settings.current_project ) print "proj_path", proj_path for this_file in os.listdir(proj_path): splitted = os.path.splitext(this_file) if splitted[1] == ".txt" and splitted[0] != "menue": #print "this_file",this_file open_file = os.path.join(proj_path, this_file) do_it = lambda bla = open_file:self.on_open(bla) self.file_open_menu.add_command(label=splitted, command=do_it) def syntax_buttons(self): """draws a frame with buttons for insert (wiki)markup idea: new parameter for on_tag_insert() jump_in_between=True/False so a pulldown list for different levels of headlines arn't necessary """ # genrate a labelframe self.sfrm = tk.LabelFrame(self.btnfrm, text="Syntax Buttons") self.sfrm.pack(fill=tk.BOTH, padx=5, pady=5) # generate a button with a pulldown menue für headline Syntax self.headln_menubtn = tk.Menubutton(self.sfrm, text='= Headlines =') # generate the pulldown menue self.headln_menu = tk.Menu(self.headln_menubtn) # bind the pulldown menue to the menubutton self.headln_menubtn.config(menu=self.headln_menu, relief=tk.RIDGE) # generate menue entrys i=1 for entry in ('h1','h2','h3','h4','h5','h6'): otag = '\n\n'+'='*i+' ' ctag = ' '+'='*i+'\n\n' doit = lambda event=None, o=otag, c=ctag:self.on_tag_insert(event,o,c) self.headln_menu.add_command(label=entry, command=doit) i+=1 self.headln_menubtn.grid(column=0,row=0, sticky=tk.W+tk.E) # label beside the Button to display the associated keyboard shortcut self.headln_lbl = tk.Label(self.sfrm, text='CTRL+h', relief=tk.FLAT) self.headln_lbl.grid(column=1, row=0, sticky=tk.W+tk.E) # generate buttons as described in self.edit_button_list self.label_button_row(self.sfrm, self.syntax_button_list, 1) # bind keyboard shortcut to the menue self.text.bind('<Control-h>', lambda e: self.headln_menu.tk_popup(e.x_root, e.y_root)) def on_txt_changes(self, dummy_value=tk.NONE): """ tracks text changes inside the editfield by comparing hash values new name: visualize_txt_changes??? """ while True: new_hash = hash(self.text.get(1.0, tk.END)) if new_hash != self.hash_opened_filename: #print "changes" self.opened_file_label.configure(fg="red") else: #print "no changes" self.opened_file_label.configure(fg="black") sleep(0.2) def on_open(self, file_to_open=None): """- opens a *.txt file from project folder - generates a reference hash. - Brings the cursor to the upper left and show this position in the textfield Parameter: --> file_to_open: complete path for file to open idea: - rename file_to_open to openfile or file_to_open """ self.opened_file_to_open = file_to_open self.opened_file_label.configure(text=file_to_open) self.text.delete(1.0, tk.END) self.opened_filename = os.path.basename(file_to_open) # write file content into the editfield editfile = codecs.open(file_to_open,'r', 'utf-8') self.text.insert(1.0, editfile.read()) editfile.close() # generate reference hash for a comparison to track text changes self.hash_opened_filename = hash(self.text.get(1.0,tk.END)) self.text.edit_reset() # clear tk's undo/redo stacks self.text.focus_set() # focus to textfield self.text.mark_set(tk.INSERT, '0.0') # place cursor to upper left self.text.see(tk.INSERT) # and display this line def on_save(self): """ Safes the current edited file""" if self.opened_filename: print "on_safe_" print " self.opened_filename",self.opened_filename self.hash_opened_filename = hash(self.text.get(1.0,tk.END)) path_to_safe_file = os.path.join(self.settings.projects_path, self.settings.current_project, self.opened_filename) safefile = codecs.open(path_to_safe_file,'w', 'utf-8') safefile.write(self.text.get(1.0,tk.END)) safefile.close() self.text.edit_reset() #clear tk's undo/redo stacks else: showinfo('Globby Text Editor','No File to save \n\n' 'You need to choose a File before editing') def on_undo(self): try: # tk8.4 keeps undo/redo stacks self.text.edit_undo( ) # exception if stacks empty except tk.TclError: showinfo('Globby Text Editor', 'Nothing to undo') def on_redo(self): print "redo" try: # tk8.4 keeps undo/redo stacks self.text.edit_redo() # exception if stacks empty except tk.TclError: showinfo('Globby Text Editor', 'Nothing to redo') def on_new_page(self): """ Ask the user to name the new File, create a blank File and load it into the Editorwidget TODO: check if file with the new filename allready exists check if Filename contains Specialchars """ print "on_new_page" nfile_name = tkSimpleDialog.askstring("New File Name", "Fill in a new File Name") proj_path = os.path.join(self.settings.projects_path, self.settings.current_project) nfile_name = os.path.join(proj_path, nfile_name.strip()+'.txt') nfile = codecs.open(nfile_name, 'w', 'utf-8') current_project = self.settings.current_project infostring1 = u'# Diese Datei wurde automatisch mit ' infostring2 = u'dem Projekt "%s" erstellt' % current_project nfile.write(infostring1+infostring2 ) nfile.close() self.on_open(nfile_name) def on_del_page(self): """""" print "del page" # self.settings.current_project del_file = os.path.join(self.settings.projects_path, self.settings.current_project, self.opened_filename) del_page = askyesno("Do you really want to delete ", del_file) if del_page: #self.set_project(self.new_project_name) print "%s geloescht" % del_file os.remove(del_file) def on_tag_insert(self, event=None, open_tag=None, close_tag=None): """ inserts a (wiki)tag to the current cursor position. If there is no text marked in the editfield, open_tag and close_tag are inserted to the current cursor position behind each other and the cursor jumps in between. Otherwise the marked string is enclosed by open_tag and close_tag and inserted to the current cursor position. Here the new cursor position is right behind the complete inserted string with tags. At this moment this behavior is quite buggy :-( idea: - new parameter for on_tag_insert() jump_in_between=True/False so a pulldown list for different levels of headlines arn't necessary - rename to: on_insert_tag?? on_tag_insert Parameter: --> event # keyboard shortcut --> open_tag # string --> close_tag # string """ #print 'event',event #print 'open_tag',open_tag #print 'close_tag',close_tag ## when no String is selected: if not self.text.tag_ranges(tk.SEL): print "no String is selected" insert_point = self.text.index('insert') insertline = insert_point.split('.')[0] addit = 1 if event != None: print "event not None" addit = 2 insertrow = str(int(insert_point.split('.')[1])+len(open_tag)+addit) new_insert_point = insertline+'.'+ insertrow self.text.insert(insert_point, open_tag+''+close_tag) # place cursor to insert_point self.text.mark_set(tk.INSERT, new_insert_point) # display this position on the editfield self.text.see(tk.INSERT) ## when a String is selected: else: #print "im else" marked_text = self.text.get(self.text.index(tk.SEL_FIRST), self.text.index(tk.SEL_LAST)) replace_index = self.text.index(tk.SEL_FIRST) print "replace_index in selected", replace_index self.text.delete(self.text.index(tk.SEL_FIRST), self.text.index(tk.SEL_LAST)) self.text.insert(replace_index, open_tag+marked_text+close_tag)
class ClustererGui(ttk.Frame): """GUI to open/save xml/text-files and visualize clustering.""" def __init__(self, master=None): """Init GUI - get auto-split-sentences-option and standard test-file-folder from config-file.""" ttk.Frame.__init__(self, master) self.grid(sticky=tk.N + tk.S + tk.E + tk.W) self.createWidgets() self.filepath = None self.xml_filepath = None self.filename = None self.article_id = None self.extraction = None self.author_no = None self.correct = None self.result = None self.colors = [] config = ConfigParser.ConfigParser() config.read("config.cfg") params = dict(config.items("params")) article_dir = params['test_file_dir'] self.auto_split_sentences = bool(int(params['auto_split_sentences'])) self.show_knee_point = bool(int(params['show_knee_point'])) self.show_knee_point = False # currently not supported in GUI-mode self.last_dir = article_dir def createWidgets(self): """Organize GUI.""" top = self.winfo_toplevel() top.rowconfigure(0, weight=1) top.columnconfigure(0, weight=1) self.rowconfigure(0, weight=1) self.rowconfigure(1, weight=0) self.columnconfigure(0, weight=1) self.columnconfigure(1, weight=0) left_frame = ttk.Frame(self, relief="raised", borderwidth=1) left_frame.grid(row=0, column=0, sticky=tk.N + tk.S + tk.E + tk.W) left_frame.rowconfigure(0, weight=0) left_frame.rowconfigure(1, weight=1) left_frame.columnconfigure(0, weight=1) buttons_topleft = ttk.Frame(left_frame) buttons_topleft.grid(row=0, column=0) self.choose_file_btn = ttk.Button(buttons_topleft, text='choose file...', command=self.choose_file) self.choose_file_btn.grid(row=0, column=0) self.save_file_btn = ttk.Button(buttons_topleft, text='save file...', command=self.save_file) self.save_file_btn.grid(row=0, column=1) self.extract_feat_btn = ttk.Button( buttons_topleft, text='process', command=self.start_featureextr_thread) self.extract_feat_btn.grid(row=0, column=2) right_frame = ttk.Frame(self) right_frame.grid(row=0, column=1, sticky=tk.N + tk.S + tk.E + tk.W) right_frame.rowconfigure(0, weight=1) right_frame.rowconfigure(1, weight=1) self.distr_entry = ScrolledText(right_frame, width=30, height=30) self.distr_entry.grid(row=0, column=0, columnspan=2, sticky=tk.N) self.test_entry = ScrolledText(right_frame, width=30) self.test_entry.grid(row=1, column=0, columnspan=2, sticky=tk.N) self.scrolledText = ScrolledText(left_frame, undo=True, wrap=tk.WORD) self.scrolledText['font'] = ('Helvetica', '12') self.scrolledText.tag_configure('lines', background="#dddddd", foreground="black", font=('Helvetica', 9)) self.scrolledText.tag_configure('blanks', background="#ffffff", foreground="black", font=('Helvetica', 9)) self.scrolledText.grid(row=1, column=0, sticky=tk.N + tk.S + tk.E + tk.W) status_bar = ttk.Frame(self) status_bar.grid(row=1, column=0, columnspan=2, sticky=tk.W) status_bar.columnconfigure(0, weight=1, minsize=100) status_bar.columnconfigure(1, weight=1) self.status = tk.StringVar() self.status.set("ready") self.status_label = ttk.Label(status_bar, textvariable=self.status) self.status_label.grid(row=0, column=1, padx=10) self.progressbar = ttk.Progressbar(status_bar, mode='indeterminate', length=200) self.progressbar.grid(row=0, column=0, padx=3) def choose_file(self): """Choose text or xml file dialog.""" self.filepath = askopenfilename(initialdir=self.last_dir, filetypes=(("text and xml files", ("*.txt", "*.xml")), )) if self.filepath: ext = os.path.splitext(self.filepath)[1] if ext == ".xml": '''save raw-text of xml-file to a new file and print it''' self.xml_filepath = self.filepath self.filepath = self.create_text_fromXML() base = os.path.split(self.filepath)[0] self.article_id = os.path.split(base)[1] self.filename = os.path.split(self.filepath)[1] self.scrolledText.delete(1.0, tk.END) self.print_raw_text() self.scrolledText.edit_reset() def create_text_fromXML(self): """Create text-file out of given xml-file.""" new_filepath = os.path.splitext(self.filepath)[0] + ".txt" with codecs.open(self.filepath, 'r', 'UTF-8') as xml_file: xml_tree = etree.parse(xml_file) with codecs.open(new_filepath, 'w', 'UTF-8') as newFile: first_entry = True for entry in xml_tree.getroot(): if entry.text is not None: if not first_entry: newFile.write("\n\n") else: first_entry = False newFile.write(entry.text) return new_filepath def save_file(self): """Save text-file-dialog.""" text = self.scrolledText.get("0.0", tk.END) if self.filepath is None: name = asksaveasfilename(initialdir=self.last_dir, defaultextension=".txt") if name: self.filepath = name else: return try: with codecs.open(self.filepath, 'w', 'UTF-8') as newFile: newFile.write(text.strip()) self.scrolledText.edit_reset() base = os.path.split(self.filepath)[0] self.article_id = os.path.split(base)[1] self.filename = os.path.split(self.filepath)[1] return True except Exception: # as e: raise def start_featureextr_thread(self): """Start thread for feature extraction.""" self.distr_entry.delete(1.0, tk.END) self.status.set("processing...") if self.filepath is None or self.article_id is None: tkMessageBox.showwarning("Save File", "Save file for feature extraction.") return try: self.scrolledText.edit_undo() self.scrolledText.edit_redo() tkMessageBox.showwarning("File changed", "File was changed, please save.") return except tk.TclError: self.extraction = clusterer.Clusterer(self.article_id, self.filepath, self.xml_filepath, self.auto_split_sentences, self.show_knee_point) self.ftr_extr_thread = threading.Thread( target=self.extract_features) self.ftr_extr_thread.daemon = True self.progressbar.start() self.ftr_extr_thread.start() self.after(1000, self.check_feat_thread) def check_feat_thread(self): """Check if feature extraction thread is still working - if not: visualize cluster-results.""" if self.ftr_extr_thread.is_alive(): self.after(1000, self.check_feat_thread) else: self.status.set("ready") # generate author-colormap self.colors = [None] * len(set(self.clusters)) for k in set(self.clusters): temp_color = plt.cm.spectral( np.float(k) / (np.max(self.clusters) + 1)) if k == 0: temp_color = plt.cm.spectral(0.05) self.colors[k] = self.convert_to_hex(temp_color) self.configure_colors() self.progressbar.stop() self.print_author_distr() self.print_text() if self.correct is not None and self.author_no is not None: self.test_entry.delete(1.0, tk.END) s = "authors found: {}".format(len(set(self.clusters))) s += "\n believe-score: {:.4f}".format(self.believe_score) s += "\n\n true number of authors: {}".format(self.author_no) s += "\n precision: {:.4f}".format(self.scores[0]) s += "\n recall: {:.4f}".format(self.scores[1]) s += "\n f1-score: {:.4f}".format(self.scores[2]) s += "\n adjusted-rand-index: {:.4f}".format(self.scores[3]) self.test_entry.insert(tk.INSERT, s) else: self.test_entry.delete(1.0, tk.END) s = "authors found: {}".format(len(set(self.clusters))) s += "\n believe-score: {:.4f}".format(self.believe_score) self.test_entry.insert(tk.INSERT, s) def extract_features(self): """Start feature extraction.""" self.clusters, self.result, self.author_no, self.believe_score, self.scores = self.extraction.calc_cluster( ) if self.result is not None: c = Counter(self.result) self.correct = c[True] / sum(c.values()) * 100 def print_text(self): """Print raw text with specified author-colors.""" self.scrolledText.delete(1.0, tk.END) f = open(self.filepath) line_number = 0 actual_line_number = 0 for line in f: actual_line_number += 1 if line.strip(): s = str(line_number) + ' ' + str( self.clusters[line_number]) + ' ' + line s = line line_cluster = str(line_number) + ' ' + str( self.clusters[line_number]) + ' ' line_cluster = ('{:^' + str(14 - len(line_cluster)) + '}').format(line_cluster) self.scrolledText.insert(tk.INSERT, line_cluster, 'lines') try: self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number])) # if self.result[line_number]: # # correct assignment - print text foreground in white # self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number])) # else: # # false assignment - print text foreground in black # self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]*10**2)) except IndexError: self.scrolledText.insert(tk.INSERT, s) except TypeError: self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number])) line_number += 1 else: s = line self.scrolledText.insert(tk.INSERT, s, 'blanks') f.close() def print_raw_text(self): """Print raw text.""" f = open(self.filepath) for line in f: self.scrolledText.insert(tk.INSERT, line) f.close() def get_distribution(self, l=None): """Return Counter with author distribution in percent.""" if l is None: l = self.clusters counter = Counter(l) sum_counter = sum(counter.values()) for key in counter.iterkeys(): counter[key] = counter[key] / sum_counter * 100 return counter def print_author_distr(self): """Print author distribution with specified author-colors.""" self.distr_entry.delete(1.0, tk.END) distr = self.get_distribution(self.clusters) for index, count in distr.most_common(): author_i = "author " + str(index) + "{:>20}%\n".format( locale.format(u'%.2f', count)) self.distr_entry.insert(tk.INSERT, author_i, str(index)) def convert_to_hex(self, col): """Convert inter-tuple to hex-coded string.""" red = int(col[0] * 255) green = int(col[1] * 255) blue = int(col[2] * 255) return '#{r:02x}{g:02x}{b:02x}'.format(r=red, g=green, b=blue) def configure_colors(self): """Configure author-specific colors for author-distribution and cluster-results.""" for i, c in enumerate(self.colors): self.scrolledText.tag_configure(str(i), background=c, foreground="white") self.distr_entry.tag_configure(str(i), background=c, foreground="white")
class ClustererGui(ttk.Frame): """GUI to open/save xml/text-files and visualize clustering.""" def __init__(self, master=None): """Init GUI - get auto-split-sentences-option and standard test-file-folder from config-file.""" ttk.Frame.__init__(self, master) self.grid(sticky=tk.N+tk.S+tk.E+tk.W) self.createWidgets() self.filepath = None self.xml_filepath = None self.filename = None self.article_id = None self.extraction = None self.author_no = None self.correct = None self.result = None self.colors = [] config = ConfigParser.ConfigParser() config.read("config.cfg") params = dict(config.items("params")) article_dir = params['test_file_dir'] self.auto_split_sentences = bool(int(params['auto_split_sentences'])) self.show_knee_point = bool(int(params['show_knee_point'])) self.show_knee_point = False # currently not supported in GUI-mode self.last_dir = article_dir def createWidgets(self): """Organize GUI.""" top=self.winfo_toplevel() top.rowconfigure(0, weight=1) top.columnconfigure(0, weight=1) self.rowconfigure(0, weight=1) self.rowconfigure(1, weight=0) self.columnconfigure(0, weight=1) self.columnconfigure(1, weight=0) left_frame = ttk.Frame(self, relief="raised", borderwidth=1) left_frame.grid(row=0, column=0, sticky=tk.N+tk.S+tk.E+tk.W) left_frame.rowconfigure(0, weight=0) left_frame.rowconfigure(1, weight=1) left_frame.columnconfigure(0, weight=1) buttons_topleft = ttk.Frame(left_frame) buttons_topleft.grid(row=0, column=0) self.choose_file_btn = ttk.Button(buttons_topleft, text='choose file...', command=self.choose_file) self.choose_file_btn.grid(row=0, column=0) self.save_file_btn = ttk.Button(buttons_topleft, text='save file...', command=self.save_file) self.save_file_btn.grid(row=0, column=1) self.extract_feat_btn = ttk.Button(buttons_topleft, text='process', command=self.start_featureextr_thread) self.extract_feat_btn.grid(row=0, column=2) right_frame = ttk.Frame(self) right_frame.grid(row=0, column=1, sticky=tk.N+tk.S+tk.E+tk.W) right_frame.rowconfigure(0, weight=1) right_frame.rowconfigure(1, weight=1) self.distr_entry = ScrolledText(right_frame, width=30, height=30) self.distr_entry.grid(row=0, column=0, columnspan=2, sticky=tk.N) self.test_entry = ScrolledText(right_frame, width=30) self.test_entry.grid(row=1, column=0, columnspan=2, sticky=tk.N) self.scrolledText = ScrolledText(left_frame, undo=True, wrap=tk.WORD) self.scrolledText['font'] = ('Helvetica', '12') self.scrolledText.tag_configure('lines', background="#dddddd", foreground="black", font=('Helvetica', 9)) self.scrolledText.tag_configure('blanks', background="#ffffff", foreground="black", font=('Helvetica', 9)) self.scrolledText.grid(row=1, column=0, sticky=tk.N+tk.S+tk.E+tk.W) status_bar = ttk.Frame(self) status_bar.grid(row=1, column=0, columnspan=2, sticky=tk.W) status_bar.columnconfigure(0, weight=1, minsize=100) status_bar.columnconfigure(1, weight=1) self.status = tk.StringVar() self.status.set("ready") self.status_label = ttk.Label(status_bar, textvariable=self.status) self.status_label.grid(row=0, column=1, padx=10) self.progressbar = ttk.Progressbar(status_bar, mode='indeterminate', length=200) self.progressbar.grid(row=0, column=0, padx=3) def choose_file(self): """Choose text or xml file dialog.""" self.filepath = askopenfilename(initialdir=self.last_dir, filetypes=(("text and xml files", ("*.txt","*.xml")),)) if self.filepath: ext = os.path.splitext(self.filepath)[1] if ext == ".xml": '''save raw-text of xml-file to a new file and print it''' self.xml_filepath = self.filepath self.filepath = self.create_text_fromXML() base = os.path.split(self.filepath)[0] self.article_id = os.path.split(base)[1] self.filename = os.path.split(self.filepath)[1] self.scrolledText.delete(1.0, tk.END) self.print_raw_text() self.scrolledText.edit_reset() def create_text_fromXML(self): """Create text-file out of given xml-file.""" new_filepath = os.path.splitext(self.filepath)[0] + ".txt" with codecs.open(self.filepath, 'r', 'UTF-8') as xml_file: xml_tree = etree.parse(xml_file) with codecs.open(new_filepath, 'w', 'UTF-8') as newFile: first_entry = True for entry in xml_tree.getroot(): if entry.text is not None: if not first_entry: newFile.write("\n\n") else: first_entry = False newFile.write(entry.text) return new_filepath def save_file(self): """Save text-file-dialog.""" text = self.scrolledText.get("0.0", tk.END) if self.filepath is None: name = asksaveasfilename(initialdir=self.last_dir, defaultextension=".txt") if name: self.filepath = name else: return try: with codecs.open(self.filepath, 'w', 'UTF-8') as newFile: newFile.write(text.strip()) self.scrolledText.edit_reset() base = os.path.split(self.filepath)[0] self.article_id = os.path.split(base)[1] self.filename = os.path.split(self.filepath)[1] return True except Exception:# as e: raise def start_featureextr_thread(self): """Start thread for feature extraction.""" self.distr_entry.delete(1.0, tk.END) self.status.set("processing...") if self.filepath is None or self.article_id is None: tkMessageBox.showwarning( "Save File", "Save file for feature extraction.") return try: self.scrolledText.edit_undo() self.scrolledText.edit_redo() tkMessageBox.showwarning( "File changed", "File was changed, please save.") return except tk.TclError: self.extraction = clusterer.Clusterer(self.article_id, self.filepath, self.xml_filepath, self.auto_split_sentences, self.show_knee_point) self.ftr_extr_thread = threading.Thread(target=self.extract_features) self.ftr_extr_thread.daemon = True self.progressbar.start() self.ftr_extr_thread.start() self.after(1000, self.check_feat_thread) def check_feat_thread(self): """Check if feature extraction thread is still working - if not: visualize cluster-results.""" if self.ftr_extr_thread.is_alive(): self.after(1000, self.check_feat_thread) else: self.status.set("ready") # generate author-colormap self.colors = [None]*len(set(self.clusters)) for k in set(self.clusters): temp_color = plt.cm.spectral(np.float(k) / (np.max(self.clusters) + 1)) if k == 0: temp_color = plt.cm.spectral(0.05) self.colors[k] = self.convert_to_hex(temp_color) self.configure_colors() self.progressbar.stop() self.print_author_distr() self.print_text() if self.correct is not None and self.author_no is not None: self.test_entry.delete(1.0, tk.END) s = "authors found: {}".format(len(set(self.clusters))) s += "\n believe-score: {:.4f}".format(self.believe_score) s += "\n\n true number of authors: {}".format(self.author_no) s += "\n precision: {:.4f}".format(self.scores[0]) s += "\n recall: {:.4f}".format(self.scores[1]) s += "\n f1-score: {:.4f}".format(self.scores[2]) s += "\n adjusted-rand-index: {:.4f}".format(self.scores[3]) self.test_entry.insert(tk.INSERT, s) else: self.test_entry.delete(1.0, tk.END) s = "authors found: {}".format(len(set(self.clusters))) s += "\n believe-score: {:.4f}".format(self.believe_score) self.test_entry.insert(tk.INSERT, s) def extract_features(self): """Start feature extraction.""" self.clusters, self.result, self.author_no, self.believe_score, self.scores = self.extraction.calc_cluster() if self.result is not None: c = Counter(self.result) self.correct = c[True] / sum(c.values()) * 100 def print_text(self): """Print raw text with specified author-colors.""" self.scrolledText.delete(1.0, tk.END) f = open(self.filepath) line_number = 0 actual_line_number = 0 for line in f: actual_line_number += 1 if line.strip(): s = str(line_number) + ' '+str(self.clusters[line_number]) + ' '+line s = line line_cluster = str(line_number) + ' '+str(self.clusters[line_number])+ ' ' line_cluster = ('{:^'+str(14-len(line_cluster))+'}').format(line_cluster) self.scrolledText.insert(tk.INSERT, line_cluster, 'lines') try: self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number])) # if self.result[line_number]: # # correct assignment - print text foreground in white # self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number])) # else: # # false assignment - print text foreground in black # self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number]*10**2)) except IndexError: self.scrolledText.insert(tk.INSERT, s) except TypeError: self.scrolledText.insert(tk.INSERT, s, str(self.clusters[line_number])) line_number += 1 else: s = line self.scrolledText.insert(tk.INSERT, s, 'blanks') f.close() def print_raw_text(self): """Print raw text.""" f = open(self.filepath) for line in f: self.scrolledText.insert(tk.INSERT, line) f.close() def get_distribution(self, l=None): """Return Counter with author distribution in percent.""" if l is None: l = self.clusters counter = Counter(l) sum_counter = sum(counter.values()) for key in counter.iterkeys(): counter[key] = counter[key] / sum_counter * 100 return counter def print_author_distr(self): """Print author distribution with specified author-colors.""" self.distr_entry.delete(1.0, tk.END) distr = self.get_distribution(self.clusters) for index, count in distr.most_common(): author_i = "author "+str(index)+"{:>20}%\n".format(locale.format(u'%.2f',count)) self.distr_entry.insert(tk.INSERT, author_i, str(index)) def convert_to_hex(self, col): """Convert inter-tuple to hex-coded string.""" red = int(col[0]*255) green = int(col[1]*255) blue = int(col[2]*255) return '#{r:02x}{g:02x}{b:02x}'.format(r=red,g=green,b=blue) def configure_colors(self): """Configure author-specific colors for author-distribution and cluster-results.""" for i,c in enumerate(self.colors): self.scrolledText.tag_configure(str(i), background=c, foreground="white") self.distr_entry.tag_configure(str(i), background=c, foreground="white")