def replacefunc(elt): text = elt.attrib['href'] if link_type(text) != 'page': raise zim.formats.VisitorSkip href = HRef.new_from_wiki_link(text) target = self.pages.resolve_link(page, href) if target == oldroot: return self._update_link_tag(elt, page, newroot, href) elif target.ischild(oldroot): newtarget = newroot.child(target.relname(oldroot)) return self._update_link_tag(elt, page, newtarget, href) elif href.rel == HREF_REL_FLOATING \ and natural_sort_key(href.parts()[0]) == natural_sort_key(oldroot.basename) \ and page.ischild(oldroot.parent): targetrecord = self.pages.lookup_by_pagename(target) if not target.ischild(oldroot.parent) \ or not targetrecord.exists(): # An link that was anchored to the moved page, # but now resolves somewhere higher in the tree # Or a link that no longer resolves if len(href.parts()) == 1: return self._update_link_tag(elt, page, newroot, href) else: mynewroot = newroot.child(':'.join(href.parts()[1:])) return self._update_link_tag(elt, page, mynewroot, href) raise zim.formats.VisitorSkip
def runTest(self): def basename(name): if ":" in name: return name.split(":")[-1] else: return name db = sqlite3.connect(':memory:') db.row_factory = sqlite3.Row pi = PagesIndexer(db, None, tests.MockObject()) for i, name, cont in self.PAGES: db.execute( 'INSERT INTO pages(id, name, lowerbasename, sortkey, parent, source_file) VALUES (?, ?, ?, ?, 1, 1)', (i, name, basename(name).lower(), natural_sort_key(name))) ## Test PagesViewInternal methods iview = PagesViewInternal(db) i, pn = iview.resolve_pagename(Path(''), ['foo']) self.assertEqual((i, pn), (3, Path('Foo'))) i, pn = iview.resolve_link(Path('Foo'), HRef.new_from_wiki_link('Bar')) self.assertEqual((i, pn), (2, Path('Bar'))) ## Test the actual indexer pageindexer = tests.MaskedObject(pi, 'connect') indexer = LinksIndexer(db, pageindexer) for i, name, cont in self.PAGES: row = { 'id': i, 'name': name, 'sortkey': natural_sort_key(name), 'is_link_placeholder': False } indexer.on_page_row_inserted(pageindexer, row) ### pageindexer.setObjectAccess('insert_link_placeholder') for i, name, text in self.PAGES: tree = WikiParser().parse(text) row = {'id': i, 'name': name} indexer.on_page_changed(pageindexer, row, tree) indexer.update() links = sorted((r['source'], r['target']) for r in db.execute('SELECT * FROM links')) self.assertEqual(links, [(3, 2), (3, 4)]) ### pageindexer.setObjectAccess('remove_page') for i, name, cont in self.PAGES: row = {'id': i, 'name': name, 'is_link_placeholder': False} indexer.on_page_row_deleted(pageindexer, row) indexer.update() rows = db.execute('SELECT * FROM links').fetchall() self.assertEqual(rows, [])
def sort_by_number_or_string(self, liststore, treeiter1, treeiter2, colid): ''' Sort algorithm for sorting numbers correctly and putting 10 after 3. This part can be improved in future to support also currencies, dates, floats, etc. :param liststore: model of treeview :param treeiter1: treeiter 1 :param treeiter2: treeiter 2 :param colid: a column number :return: -1 / first data is smaller than second, 0 / equality, 1 / else ''' data1 = natural_sort_key(liststore.get_value(treeiter1, colid)) data2 = natural_sort_key(liststore.get_value(treeiter2, colid)) return (data1 > data2) - (data1 < data2) # python3 jargon for "cmp()"
def _db_check(self): try: if self.get_property('db_version') != DB_VERSION: logger.info('Index db_version out of date') self._db_init() elif self.get_property('db_sortkey_format') != natural_sort_key( 'db_sortkey_format'): logger.info('Index db_sortkey_format out of date') self._db_init() else: pass except sqlite3.OperationalError: # db is there but table does not exist logger.debug('Operational error, init tabels') self._db_init() except sqlite3.DatabaseError: assert not self.dbpath == ':memory:' logger.warning('Overwriting possibly corrupt database: %s', self.dbpath) self.db.close() file = LocalFile(self.dbpath) try: file.remove() except: logger.exception('Could not delete: %s', file) # TODO: how to recover form this ? - seems fatal finally: self._db = self._new_connection() self._db_init()
def resolve_pagename(self, parent, names, parent_id=None): '''Resolve a pagename in the right case''' # We do not ignore placeholders here. This can lead to a dependencies # in how links are resolved based on order of indexing. However, this # is not really a problem. Ignoring them means you could see duplicates # if the tree for multiple links with slightly different spelling. # Also we would need another call to return the page_id if a resolved # page happens to exist. assert isinstance(parent, Path) pagename = parent page_id = parent_id or self.get_page_id(parent) for i, basename in enumerate(names): sortkey = natural_sort_key(basename) candidates = self.db.execute( 'SELECT id, name FROM pages ' 'WHERE parent=? and sortkey=? ORDER BY name', (page_id, sortkey)).fetchall() exact = pagename.child(basename).name for row in candidates: if row['name'] == exact: pagename = Path(row['name']) page_id = row['id'] break else: if candidates: # case insensitive match(es) row = candidates[0] pagename = Path(row['name']) page_id = row['id'] else: # no match return None, pagename.child(':'.join(names[i:])) else: return page_id, pagename
def _update(self, *a): '''Update the cloud to show only tags that share a set of pages with the selected tags.''' tagview = TagsView.new_from_index(self.index) selected = [] for button in self.get_children(): if button.get_active(): try: selected.append(tagview.lookup_by_tagname(button.indextag)) except IndexNotFoundError: pass # Need the lookup here in case the tag went missing in the # mean time e.g. due to editing of the page self._clear() if selected: tags = tagview.list_intersecting_tags(selected) else: tags = tagview.list_all_tags_by_n_pages() if self._alphabetically: tags = sorted(tags, key=lambda t: natural_sort_key(t.name)) # else leave sorted by score buffer = self.get_buffer() for tag in tags: iter = buffer.get_end_iter() anchor = buffer.create_child_anchor(iter) button = TagCloudItem(tag) button.set_active(tag in selected) button.connect("toggled", lambda b: self._update()) self.add_child_at_anchor(button, anchor) self.show_all() self.emit('selection-changed')
def on_page_changed(self, pagesindexer, pagerow, doc): oldtags = dict((r[0], r) for r in self.db.execute( 'SELECT tags.sortkey, tags.name, tags.id FROM tagsources ' 'LEFT JOIN tags ON tagsources.tag = tags.id ' 'WHERE tagsources.source=?', (pagerow['id'], ))) seen = set() for name in doc.iter_tag_names(): sortkey = natural_sort_key(name) if sortkey in seen: continue elif sortkey in oldtags: seen.add(sortkey) oldtags.pop(sortkey) else: seen.add(sortkey) row = self.db.execute('SELECT * FROM tags WHERE sortkey=?', (sortkey, )).fetchone() if not row: # Create new tag self.db.execute( 'INSERT INTO tags(name, sortkey) VALUES (?, ?)', (name, sortkey)) row = self.db.execute('SELECT * FROM tags WHERE sortkey=?', (sortkey, )).fetchone() assert row self.emit('tag-row-inserted', row) self.db.execute( 'INSERT INTO tagsources(source, tag) VALUES (?, ?)', (pagerow['id'], row['id'])) self.emit('tag-added-to-page', row, pagerow) for row in list(oldtags.values()): self._remove_tag_from_page(row, pagerow)
def _insert_page(self, pagename, is_link_placeholder, file_id=None): assert not (is_link_placeholder and file_id) # insert parents parent_row = self._select(pagename.parent) if parent_row is None: self._insert_page(pagename.parent, is_link_placeholder) # recurs parent_row = self._select(pagename.parent) assert parent_row is not None # insert new page lowerbasename = pagename.basename.lower() sortkey = natural_sort_key(pagename.basename) self.db.execute( 'INSERT INTO pages(name, lowerbasename, sortkey, parent, is_link_placeholder, source_file)' 'VALUES (?, ?, ?, ?, ?, ?)', (pagename.name, lowerbasename, sortkey, parent_row['id'], is_link_placeholder, file_id)) row = self._select(pagename) self._update_parent_nchildren(pagename.parent) self.emit('page-row-inserted', row) # update parent(s) self.update_parent(pagename.parent) return row['id']
def _db_connect(self): # NOTE: for a locked database, different errors happen on linux and # on windows, so test both platforms when modifying here try: self._db = sqlite3.Connection(self.dbpath) except: self._db_recover() self._db.row_factory = sqlite3.Row try: self._db.execute('PRAGMA synchronous=OFF;') # Don't wait for disk writes, we can recover from crashes # anyway. Allows us to use commit more frequently. if self.get_property('db_version') != DB_VERSION: logger.info('Index db_version out of date') self._db_init() elif self.get_property('db_sortkey_format') != natural_sort_key( DB_SORTKEY_CONTENT): logger.info('Index db_sortkey_format out of date') self._db_init() else: self.set_property('db_version', DB_VERSION) # Ensure we can write except sqlite3.OperationalError: # db is there but table does not exist logger.debug('Operational error, init tabels') self._db_init() except sqlite3.DatabaseError: self._db_recover()
def list_floating_links(self, basename): anchorkey = natural_sort_key(basename) for row in self.db.execute( 'SELECT DISTINCT source, target FROM links ' 'WHERE rel=? and anchorkey=?', (HREF_REL_FLOATING, anchorkey)): target = self._pages.get_pagename(row['target']) source = self._pages.get_pagename(row['source']) yield IndexLink(source, target)
def on_page_changed(self, o, row, doc): # Drop links for this page and add new ones (don't bother # determining delta and updating). self.db.execute('DELETE FROM links WHERE source=?', (row['id'], )) for href in doc.iter_href(): anchorkey = natural_sort_key(href.parts()[0]) self.db.execute( 'INSERT INTO links(source, target, rel, names, anchorkey, needscheck) ' 'VALUES (?, ?, ?, ?, ?, ?)', (row['id'], ROOT_ID, href.rel, href.names, anchorkey, 1))
def load_versions(self, versions): model = self.get_model() model.clear() # Empty for when we update model.set_sort_column_id(self.REV_SORT_COL, gtk.SORT_DESCENDING) # By default sort by rev for version in versions: #~ print version key = natural_sort_key(version[0]) # key for REV_SORT_COL model.append((key,) + tuple(version))
def load_versions(self, versions): model = self.get_model() model.clear() # Empty for when we update model.set_sort_column_id(self.REV_SORT_COL, Gtk.SortType.DESCENDING) # By default sort by rev for version in versions: #~ print version key = natural_sort_key(version[0]) # key for REV_SORT_COL model.append((key, ) + tuple(version))
def on_page_changed(self, o, row, doc): # Drop links for this page and add new ones (don't bother # determining delta and updating). self.db.execute('DELETE FROM links WHERE source=?', (row['id'], )) for href in doc.iter_href(include_anchors=False): assert href.parts() # links cannot be only anchor anchorkey = natural_sort_key(href.parts()[0]) try: #print("INSERT INTO links(%d, %d, %d, %s,...)" % (row['id'], ROOT_ID, href.rel, href.names)) self.db.execute( 'INSERT INTO links(source, target, rel, names, anchorkey, needscheck) ' 'VALUES (?, ?, ?, ?, ?, ?)', (row['id'], ROOT_ID, href.rel, href.names, anchorkey, 1)) except sqlite3.IntegrityError: logger.exception( 'Integrity error when inserting link (%d,%d,%d,%s)', row['id'], ROOT_ID, href.rel, href.names)
def sort_selected_lines(self): buffer = self.window.pageview.view.get_buffer() try: sel_start, sel_end = buffer.get_selection_bounds() except ValueError: MessageDialog( self.window, _('Please select more than one line of text, first.')).run() # T: Error message in "" dialog, %s will be replaced by application name return first_lineno = sel_start.get_line() last_lineno = sel_end.get_line() with buffer.user_action: # Get iters for full selection iter_end_line = buffer.get_iter_at_line(last_lineno) iter_end_line.forward_line() # include \n at end of line if iter_end_line.is_end() and not iter_end_line.starts_line(): # no \n at end of buffer, insert it buffer.insert(iter_end_line, '\n') iter_end_line = buffer.get_end_iter() iter_begin_line = buffer.get_iter_at_line(first_lineno) # Make a list of tuples, first element of each tuple is # text only sort key (no formatting), second element # is parsetree per line lines = [] for line_nr in range(first_lineno, last_lineno + 1): start, end = buffer.get_line_bounds(line_nr) text = buffer.get_text(start, end) tree = buffer.get_parsetree(bounds=(start, end)) lines.append((natural_sort_key(text), tree)) #~ logger.debug("Content of selected lines (text, tree): %s", lines) # Sort the list of tuples sorted_lines = sorted(lines) if lines == sorted_lines: # reverse if already sorted sorted_lines.reverse() #~ logger.debug("Sorted lines: %s", sorted_lines) # Replace selection buffer.delete(iter_begin_line, iter_end_line) for line in sorted_lines: buffer.insert_parsetree_at_cursor(line[1])
def sort_selected_lines(self): buffer = self.window.pageview.view.get_buffer() try: sel_start, sel_end = buffer.get_selection_bounds() except ValueError: MessageDialog(self.ui, _('Please select more than one line of text, first.')).run() # T: Error message in "" dialog, %s will be replaced by application name return first_lineno = sel_start.get_line() last_lineno = sel_end.get_line() with buffer.user_action: # Get iters for full selection iter_end_line = buffer.get_iter_at_line(last_lineno) iter_end_line.forward_line() # include \n at end of line if iter_end_line.is_end() and not iter_end_line.starts_line(): # no \n at end of buffer, insert it buffer.insert(iter_end_line, '\n') iter_end_line = buffer.get_end_iter() iter_begin_line = buffer.get_iter_at_line(first_lineno) # Make a list of tuples, first element of each tuple is # text only sort key (no formatting), second element # is parsetree per line lines = [] for line_nr in range(first_lineno, last_lineno+1): start, end = buffer.get_line_bounds(line_nr) text = buffer.get_text(start, end) tree = buffer.get_parsetree(bounds=(start, end)) lines.append((natural_sort_key(text), tree)) #~ logger.debug("Content of selected lines (text, tree): %s", lines) # Sort the list of tuples sorted_lines = sorted(lines) if lines == sorted_lines: # reverse if already sorted sorted_lines.reverse() #~ logger.debug("Sorted lines: %s", sorted_lines) # Replace selection buffer.delete(iter_begin_line, iter_end_line) for line in sorted_lines: buffer.insert_parsetree_at_cursor(line[1])
def on_page_changed(self, o, row, doc): # Drop links for this page and add new ones (don't bother # determining delta and updating). self.db.execute( 'DELETE FROM links WHERE source=?', (row['id'],) ) pagename = Path(row['name']) for href in doc.iter_href(): target_id, targetname = self._pages.resolve_link(pagename, href) if target_id is None: target_id = self._pagesindexer.insert_link_placeholder(targetname) anchorkey = natural_sort_key(href.parts()[0]) self.db.execute( 'INSERT INTO links(source, target, rel, names, anchorkey) ' 'VALUES (?, ?, ?, ?, ?)', (row['id'], target_id, href.rel, href.names, anchorkey) )
def get_previous(self, path): '''Get the previous path in the index, in the same order that L{walk()} will yield them @param path: a L{Path} object @returns: a L{Path} object or C{None} if {path} is the first page in the index ''' # Find last (grand)child of previous item with same parent # If no previous item, yield parent if path.isroot: raise ValueError('Can\'t use root') r = self.db.execute( 'SELECT parent FROM pages WHERE name=?', (path.name,) ).fetchone() if r is None: raise IndexNotFoundError('No such page: %s' % path) else: parent_id = r[0] sortkey = natural_sort_key(path.basename) r = self.db.execute(''' SELECT * FROM pages WHERE parent=? and ( sortkey<? or (sortkey=? and name<?) ) ORDER BY sortkey DESC, name DESC LIMIT 1''', (parent_id, sortkey, sortkey, path.name) ).fetchone() if not r: parent = self._pages.get_pagename(parent_id) return None if parent.isroot else parent else: while r['n_children'] > 0: r = self.db.execute( 'SELECT * FROM pages WHERE parent=? ' 'ORDER BY sortkey DESC, name DESC LIMIT 1', (r['id'],) ).fetchone() if r is None: raise IndexConsistencyError('Missing children') else: return PageIndexRecord(r)
def resolve_pagename(self, parent, names): '''Resolve a pagename in the right case''' # We do not ignore placeholders here. This can lead to a dependencies # in how links are resolved based on order of indexing. However, this # is not really a problem. Ignoring them means you could see duplicates # if the tree for multiple links with slightly different spelling. # Also we would need another call to return the page_id if a resolved # page happens to exist. pagename = parent page_id = self.get_page_id(parent) for i, basename in enumerate(names): if page_id == ROOT_ID: row = self.db.execute( 'SELECT id, name FROM pages WHERE name=?', (basename,) ).fetchone() else: row = self.db.execute( 'SELECT id, name FROM pages WHERE parent=? and name LIKE ?', (page_id, "%:"+basename) ).fetchone() if row: # exact match pagename = Path(row['name']) page_id = row['id'] else: sortkey = natural_sort_key(basename) row = self.db.execute( 'SELECT id, name FROM pages ' 'WHERE parent=? and sortkey=? ORDER BY name', (page_id, sortkey) ).fetchone() if row: # case insensitive match pagename = Path(row['name']) page_id = row['id'] else: # no match return None, pagename.child(':'.join(names[i:])) else: return page_id, pagename
def _db_init(self): tables = [ r[0] for r in self._db.execute( 'SELECT name FROM sqlite_master ' 'WHERE type="table" and name NOT LIKE "sqlite%"') ] for table in tables: self._db.execute('DROP TABLE %s' % table) logger.debug('(Re-)Initializing database for index') self._db.executescript(''' CREATE TABLE zim_index ( key TEXT, value TEXT, CONSTRAINT uc_MetaOnce UNIQUE (key) ); INSERT INTO zim_index VALUES ('db_version', %r); INSERT INTO zim_index VALUES ('db_sortkey_format', %r) ''' % (DB_VERSION, natural_sort_key(DB_SORTKEY_CONTENT))) self._update_iter_init() # Force re-init of all tables self._db.commit()
def sort_selected_lines(self): buffer = self.window.pageview.view.get_buffer() first_lineno, last_lineno = self._get_selected_lines(buffer) if first_lineno == last_lineno: raise NoSelectionError() with buffer.user_action: # Get iters for full selection iter_end_line = buffer.get_iter_at_line(last_lineno) iter_end_line.forward_line() # include \n at end of line if iter_end_line.is_end() and not iter_end_line.starts_line(): # no \n at end of buffer, insert it buffer.insert(iter_end_line, '\n') iter_end_line = buffer.get_end_iter() iter_begin_line = buffer.get_iter_at_line(first_lineno) # Make a list of tuples, first element of each tuple is # text only sort key (no formatting), second element # is parsetree per line lines = [] for line_nr in range(first_lineno, last_lineno + 1): start, end = buffer.get_line_bounds(line_nr) text = buffer.get_text(start, end) tree = buffer.get_parsetree(bounds=(start, end)) lines.append((natural_sort_key(text), tree)) #~ logger.debug("Content of selected lines (text, tree): %s", lines) # Sort the list of tuples sorted_lines = sorted(lines) if lines == sorted_lines: # reverse if already sorted sorted_lines.reverse() #~ logger.debug("Sorted lines: %s", sorted_lines) # Replace selection buffer.delete(iter_begin_line, iter_end_line) for line in sorted_lines: buffer.insert_parsetree_at_cursor(line[1])
def _insert_page(self, pagename, is_link_placeholder, file_id=None): assert not (is_link_placeholder and file_id) # insert parents parent_row = self._select(pagename.parent) if parent_row is None: self._insert_page(pagename.parent, is_link_placeholder) # recurs parent_row = self._select(pagename.parent) assert parent_row is not None # update table sortkey = natural_sort_key(pagename.basename) self.db.execute( 'INSERT INTO pages(name, sortkey, parent, is_link_placeholder, source_file)' 'VALUES (?, ?, ?, ?, ?)', (pagename.name, sortkey, parent_row['id'], is_link_placeholder, file_id) ) self.update_parent(pagename.parent) # notify others row = self._select(pagename) self.emit('page-row-inserted', row) return row['id']
def _insert_page(self, pagename, is_link_placeholder, file_id=None): assert not (is_link_placeholder and file_id) # insert parents parent_row = self._select(pagename.parent) if parent_row is None: self._insert_page(pagename.parent, is_link_placeholder) # recurs parent_row = self._select(pagename.parent) assert parent_row is not None # insert new page lowerbasename = pagename.basename.lower() sortkey = natural_sort_key(pagename.basename) try: self.db.execute( 'INSERT INTO pages(name, lowerbasename, sortkey, parent, is_link_placeholder, source_file)' 'VALUES (?, ?, ?, ?, ?, ?)', (pagename.name, lowerbasename, sortkey, parent_row['id'], is_link_placeholder, file_id)) except sqlite3.IntegrityError: # This can occur in rare edge cases when resolve_page failed to # see a page existed already - typically due to locale changes # affecting sortkey logger.exception('Error while inserting page - re-index needed?') self.db.execute('UPDATE pages SET sortkey=? WHERE name=?', (sortkey, pagename.name)) row = self._select(pagename) else: row = self._select(pagename) self._update_parent_nchildren(pagename.parent) self.emit('page-row-inserted', row) # update parent(s) self.update_parent(pagename.parent) return row['id']
def resolve_link(self, source, href, ignore_link_placeholders=True): if href.rel == HREF_REL_ABSOLUTE or source.isroot: return self.resolve_pagename(ROOT_PATH, href.parts()) start, relnames = source, [] while True: # Do not assume source exists, find start point that does try: start_id = self.get_page_id(start) except IndexNotFoundError: relnames.append(start.basename) start = start.parent else: break if href.rel == HREF_REL_RELATIVE: return self.resolve_pagename(start, relnames + href.parts()) else: # HREF_REL_FLOATING # Search upward namespaces for existing pages, # By default ignore link placeholders to avoid circular # dependencies between links and placeholders assert href.rel == HREF_REL_FLOATING anchor_key = natural_sort_key(href.parts()[0]) if relnames: # Check if we are anchored in non-existing part keys = map(natural_sort_key, relnames) if anchor_key in keys: i = [c for c,k in enumerate(keys) if k==anchorkey][-1] return self.resolve_pagename(db, root, relnames[:i] + href.parts()[1:]) if ignore_link_placeholders: c = self.db.execute( 'SELECT name FROM pages ' 'WHERE sortkey=? and is_link_placeholder=0 ' 'ORDER BY name DESC', (anchor_key,) ) # sort longest first else: c = self.db.execute( 'SELECT name FROM pages ' 'WHERE sortkey=? ' 'ORDER BY name DESC', (anchor_key,) ) # sort longest first maxdepth = source.name.count(':') depth = -1 # level where items were found found = [] # candidates that match the link - these can only differ in case of the basename for name, in c: mydepth = name.count(':') if mydepth > maxdepth: continue elif mydepth < depth: break if mydepth > 0: # check whether we have a common parent parentname = name.rsplit(':', 1)[0] if start.name.startswith(parentname): depth = mydepth found.append(name) else: # resolve from root namespace found.append(name) if found: # try to match case first, else just use first match parts = href.parts() anchor = parts.pop(0) for name in found: if name.endswith(anchor): return self.resolve_pagename(Path(name), parts) else: return self.resolve_pagename(Path(found[0]), parts) else: # Return "brother" of source if relnames: return self.resolve_pagename(start, relnames[:-1] + href.parts()) else: return self.resolve_pagename(start.parent, href.parts())