Ejemplo n.º 1
0
	def create_link(self, source, target):
		'''Determine best way to represent a link between two pages
		@param source: a L{Path} object
		@param target: a L{Path} object
		@returns: a L{HRef} object
		'''
		if target == source: # weird edge case ..
			return HRef(HREF_REL_FLOATING, target.basename)
		elif target.ischild(source):
			return HRef(HREF_REL_RELATIVE, target.relname(source))
		else:
			href = self._find_floating_link(source, target)
			return href or HRef(HREF_REL_ABSOLUTE, target.name)
Ejemplo n.º 2
0
	def on_finish_update(self, o):
		# Check for ghost links - warn but still clean them up
		for row in self.db.execute('''
			SELECT DISTINCT pages.* FROM pages INNER JOIN links ON pages.id=links.source
			WHERE pages.source_file IS NULL
		''').fetchall():
			logger.warn('Found ghost links from: %s', row['name'])
			self.on_page_row_deleted(None, row)

		# Resolve pending links
		for row in self.db.execute(
			'SELECT * FROM links WHERE needscheck=1 '
			'ORDER BY anchorkey, names'
		):
			href = HRef(row['rel'], row['names'])
			source = self._pages.get_pagename(row['source'])
			target_id, targetname = self._pages.resolve_link(source, href)
			if target_id is None:
				target_id = self._pagesindexer.insert_link_placeholder(targetname)

			self.db.execute(
				'UPDATE links SET target=?, needscheck=? WHERE source=? and names=?',
				(target_id, False, row['source'], row['names'])
			)

		# Delete un-used placeholders
		for row in self.db.execute('''
			SELECT pages.id FROM pages LEFT JOIN links ON pages.id=links.target
			WHERE pages.is_link_placeholder=1 and pages.n_children=0 and links.source IS NULL
		'''):
			pagename = self._pages.get_pagename(row['id'])
			self._pagesindexer.remove_page(pagename, self._allow_cleanup)
Ejemplo n.º 3
0
    def runTest(self):
        def basename(name):
            if ":" in name:
                return name.split(":")[-1]
            else:
                return name

        db = sqlite3.connect(':memory:')
        db.row_factory = sqlite3.Row
        pi = PagesIndexer(db, None, tests.MockObject())
        for i, name, cont in self.PAGES:
            db.execute(
                'INSERT INTO pages(id, name, lowerbasename, sortkey, parent, source_file) VALUES (?, ?, ?, ?, 1, 1)',
                (i, name, basename(name).lower(), natural_sort_key(name)))

        ## Test PagesViewInternal methods
        iview = PagesViewInternal(db)
        i, pn = iview.resolve_pagename(Path(''), ['foo'])
        self.assertEqual((i, pn), (3, Path('Foo')))

        i, pn = iview.resolve_link(Path('Foo'), HRef.new_from_wiki_link('Bar'))
        self.assertEqual((i, pn), (2, Path('Bar')))

        ## Test the actual indexer
        pageindexer = tests.MaskedObject(pi, 'connect')
        indexer = LinksIndexer(db, pageindexer)

        for i, name, cont in self.PAGES:
            row = {
                'id': i,
                'name': name,
                'sortkey': natural_sort_key(name),
                'is_link_placeholder': False
            }
            indexer.on_page_row_inserted(pageindexer, row)

        ###
        pageindexer.setObjectAccess('insert_link_placeholder')
        for i, name, text in self.PAGES:
            tree = WikiParser().parse(text)
            row = {'id': i, 'name': name}
            indexer.on_page_changed(pageindexer, row, tree)

        indexer.update()

        links = sorted((r['source'], r['target'])
                       for r in db.execute('SELECT * FROM links'))
        self.assertEqual(links, [(3, 2), (3, 4)])

        ###
        pageindexer.setObjectAccess('remove_page')
        for i, name, cont in self.PAGES:
            row = {'id': i, 'name': name, 'is_link_placeholder': False}
            indexer.on_page_row_deleted(pageindexer, row)

        indexer.update()

        rows = db.execute('SELECT * FROM links').fetchall()
        self.assertEqual(rows, [])
Ejemplo n.º 4
0
    def update_iter(self):
        # Check for ghost links - warn but still clean them up
        for row in self.db.execute('''
			SELECT DISTINCT pages.* FROM pages INNER JOIN links ON pages.id=links.source
			WHERE pages.source_file IS NULL
		''').fetchall():
            logger.warn('Found ghost links from: %s', row['name'])
            self.on_page_row_deleted(None, row)
            yield

        # Check total
        n, = self.db.execute(
            'SELECT COUNT(*) FROM links WHERE needscheck=1').fetchone()

        # Resolve pending links
        for i, row in enumerate(
                self.db.execute('SELECT * FROM links WHERE needscheck=1 '
                                'ORDER BY anchorkey, names')):
            if i > 0 and i % 100 == 0:
                self.db.commit()
                logger.debug('Update link %i of %i', i, n)

            href = HRef(row['rel'], row['names'])
            source = self._pages.get_pagename(row['source'])
            target_id, targetname = self._pages.resolve_link(
                source, href, source_id=row['source'])
            if target_id is None:
                target_id = self._pagesindexer.insert_link_placeholder(
                    targetname)

            self.db.execute(
                'UPDATE links SET target=?, needscheck=0 WHERE source=? and names=? and rel=?',
                (target_id, row['source'], row['names'], row['rel']))
            yield

        # Delete un-used placeholders
        for row in self.db.execute('''
			SELECT pages.id FROM pages LEFT JOIN links ON pages.id=links.target
			WHERE pages.is_link_placeholder=1 and pages.n_children=0 and links.source IS NULL
		'''):
            pagename = self._pages.get_pagename(row['id'])
            self._pagesindexer.remove_page(pagename, self._allow_cleanup)
            yield

            # The allow_cleanup function checks whether a parent has links or not.
            # Without this guard function we would need to iterate several times
            # through this cleanup function.

        self.db.commit()
Ejemplo n.º 5
0
	def iter_href(self):
		'''Generator for links in the text
		@returns: yields a list of unique L{HRef} objects
		'''
		from zim.notebook.page import HRef # XXX
		seen = set()
		for elt in itertools.chain(
			self._etree.getiterator(LINK),
			self._etree.getiterator(IMAGE)
		):
			href = elt.attrib.get('href')
			if href and href not in seen:
				seen.add(href)
				if link_type(href) == 'page':
					try:
						yield HRef.new_from_wiki_link(href)
					except ValueError:
						pass
Ejemplo n.º 6
0
	def lookup_from_user_input(self, name, reference=None):
		'''Lookup a pagename based on user input
		@param name: the user input as string
		@param reference: a L{Path} in case relative links are supported as
		customer input
		@returns: a L{Path} object for C{name}
		@raises ValueError: when C{name} would reduce to empty string
		after removing all invalid characters, or if C{name} is a
		relative link while no C{reference} page is given.
		@raises IndexNotFoundError: when C{reference} is not indexed
		'''
		# This method re-uses most of resolve_link() but is defined
		# separate because it has a distinct different purpose.
		# Only accidental that we treat user input as links ... ;)
		href = HRef.new_from_wiki_link(name)
		if reference is None and href.rel == HREF_REL_RELATIVE:
			raise ValueError, 'Got relative page name without parent: %s' % name
		else:
			source = reference or ROOT_PATH
			id, pagename = self._pages.resolve_link(
								source, href, ignore_link_placeholders=False)
			return pagename
Ejemplo n.º 7
0
	def on_finish_update(self, o):
		# Resolve pending links
		for row in self.db.execute(
			'SELECT * FROM links WHERE needscheck=1 '
			'ORDER BY anchorkey, names'
		):
			href = HRef(row['rel'], row['names'])
			source = self._pages.get_pagename(row['source'])
			target_id, targetname = self._pages.resolve_link(source, href)
			if target_id is None:
				target_id = self._pagesindexer.insert_link_placeholder(targetname)

			self.db.execute(
				'UPDATE links SET target=?, needscheck=? WHERE source=? and names=?',
				(target_id, False, row['source'], row['names'])
			)

		# Delete un-used placeholders
		for row in self.db.execute('''
			SELECT pages.id FROM pages LEFT JOIN links ON pages.id=links.target
			WHERE pages.is_link_placeholder=1 and pages.n_children=0 and links.source IS NULL
		'''):
			pagename = self._pages.get_pagename(row['id'])
			self._pagesindexer.remove_page(pagename, self._allow_cleanup)
Ejemplo n.º 8
0
		def try_link(names):
			assert names
			href = HRef(HREF_REL_FLOATING, ':'.join(names))
			pid, pagename = self._pages.resolve_link(source, href)
			return href if pagename == target else None