def __call__(self, texpart, *args, **kwargs): '''Have to do a call here only because the "item"s may or may not have an end point (particularily the last one). Have to create a start and stop so it can be handled by process_inout''' # have to handle itemize and enumerate first for nested lists use_dict = {'itemize': begin_dict['itemize'], 'enumerate': begin_dict['enumerate']} texpart.no_update_text = False texpart.update_text(use_dict) regexp = re.compile(r'\\item ([\w\W]*?)(?=(\\item|$))') researched = [] for n in texpart.text_data: if type(n) in (str, unicode): # pdb.set_trace() researched.extend(textools.re_search(regexp, n)) else: researched.append(n) new_body = [] for text in researched: if type(text) in (str, unicode, texlib.TexPart): new_body.append(text) else: self.count += 1 assert( r'\end{itemize}' not in text.group(1)) new_body.append(r'\startitem ' + text.group(1) + r'\enditem ') # need = r'\end{itemize}' # if need in text.text: # new_body.append(text.text[text.text.find(need):]) texpart.text_data = texlib.reform_text(new_body, no_indicators = True) line_items = [ ['item' ,tp(add_outside = ('<li>','</li>'), no_outer_pgraphs = True)], ] use_dict = build_dict('list_call', line_items, r'\\start{0} ', None, r'\\end{0}') texpart.update_text(use_dict = use_dict) texpart.update_text()
def update_formatting(self): self._disable_signals = True self.clear_error() rsearch_rtext = researched_richtext self._update = False # print 'Updating', time.time() qtpos = self.get_text_cursor_pos() # visible pos # print 'Got pos', qtpos raw_html = self.getHtml() # we need to get the "True Position", i.e. the position without # our formats added in. I think this is the best way to do it deformated = richtext.deformat_html( raw_html, (richtext.KEEPIF["black-bold"], richtext.KEEPIF["red-underlined-bold"]) ) deformated_str = richtext.get_str_formated_true(deformated) # assert(len(deformated_str) <= len(self.getText())) true_pos = richtext.get_position(deformated, visible_position=qtpos)[0] regexp = self.get_regexp() try: re.compile(regexp) except Exception as E: pass else: self.Replace_groups_model.set_groups(textools.get_regex_groups(regexp)) # import pprint # pprint.pprint(self.Replace_groups_model.data) error = None # These slow it down alot and are not really useful. Just # display an error if regexp == ".": error = "'.' -- Matches everything, not displayed" elif regexp == "\w": error = "'\w' -- Matches all characters, not displayed" elif regexp == "": error = "'' -- Results not displayed, matches between every" " character." else: try: researched = textools.re_search(regexp, deformated_str) if len(researched) == 1 and type(researched[0]) == str: error = "No Match Found" except re.sre_compile.error as E: error = str(E) if error: print error self.set_error(error) # believe it or not, setText will add formating! # have to explicitly set html self.setText(deformated_str) print "er setting pos", true_pos self.set_text_cursor_pos(true_pos, no_anchor=True) self._disable_signals = False return # Set the html to the correct values if self.Radio_match.isChecked(): print "doing match" html_list = rsearch_rtext.re_search_format_html(researched) else: print "doing replace" rlist = self.get_replace() replaced = textools.re_search_replace(researched, rlist, preview=True) html_list = rsearch_rtext.re_search_format_html(replaced) raw_html = richtext.get_str_formated_html(html_list) self.setHtml(raw_html) visible_pos = richtext.get_position(html_list, true_position=true_pos)[1] print "new visible pos", visible_pos self.set_text_cursor_pos(visible_pos, no_anchor=True) self._researched = researched self._html_list = html_list self._disable_signals = False
def get_text_data(text_objects, texpart_constructor, return_first = False): ''' This is the primary function for converting data into TexParts. Inputs: text_objects - list of strings and TexParts, must have been formated by reform_text texpart_constructor - the constructor used, normally defined in a list in wp_formatting.py return_first - only used by the get_document function, returns only the first object found. Output: returns the fully created text_data that is held in all TexPart objects The internal workings are as follows: given the matches, creates it in a readable array (2, txt3), # value was the first inside start of group (True, txt1), (True, txt2), (True, TxtPart), (3, txt4), # text was the final end of group (False, txt2), (False, TxtPart), etc... where True means that the text is inside your match parameters and False means they are outside. 2 and 3 are documented above. Note, the inside list takes precedence over the starter list, and the starter list takes precedence over the end list. This means that if something matches inside it will not match starter, etc. It is best to make your "insides" specific and not use special re characters like .* etc. If a starters is imbeded in an inside, it is considered inside. For instance /iffase /ifblog no hello to world /fi /fi -- ifblog will be inside of /iffalse ''' inside_list, starters_list, end_list = texpart_constructor.match_re re_in = textools.re_in # error checking on file match_cmp = re.compile('|'.join(inside_list + starters_list + end_list)) # split up text for compiling splited = [] for tobj in text_objects: if type(tobj) != str: splited.append(tobj) else: researched = textools.re_search(match_cmp, tobj) splited.extend(textools.get_iter_str_researched(researched)) inside = [re.compile(m) for m in inside_list] starter = [re.compile(m) for m in starters_list] end = [re.compile(m) for m in end_list] num_in = 0 set_num = None inout = [] #TODO: It has to match arbitrary if statements. I think this should be # pretty easy for txt in splited: # if type(txt) == str and 'Garrett' in txt and 'section' in texpart_constructor.label: # print texpart_constructor.label # pdb.set_trace() assert(num_in >= 0) if txt in (None, ''): continue elif type(txt) == TexPart: pass # TexParts have been alrady processed. elif re_in(txt, inside): if num_in == 0: set_num = 2 num_in += 1 elif num_in > 0 and re_in(txt, starter): # i.e. if you wrote something like /iffalse /ifblog num_in += 1 elif num_in > 0 and re_in(txt, end): # make sure we only count ends if you are removing! num_in -= 1 if num_in == 0: set_num = 3 if set_num: inout.append((set_num, txt)) set_num = None elif num_in > 0: inout.append((True, txt)) else: inout.append((False, txt)) return convert_inout(inout, texpart_constructor, return_first=return_first)
def init_node_researched(self, node): if node.researched != None: return with open(node.full_path) as f: text = f.read() node.researched = textools.re_search(self._regexp_text, text)