Beispiel #1
0
def main():
    logging.basicConfig(
        stream=sys.stderr,
        level=logging.DEBUG,
        format='%(asctime)s [%(name)s] %(levelname)s %(message)s')
    log.info("Opening")
    adapter = PostgreSQLAdapter()
    storage = RelStorage(adapter)
    db = DB(storage)
    log.info("Filling")
    fill_db(db)
    log.info("Packing")
    start = time.time()
    db.pack()
    end = time.time()
    log.info("Packed in %0.3f seconds", end - start)
Beispiel #2
0
class RealDatabaseTest(unittest.TestCase):
    def setUp(self):
        self.tmpdir = tempfile.mkdtemp(prefix='test-zodbbrowser-')
        self.addCleanup(shutil.rmtree, self.tmpdir)
        self.storage = FileStorage(os.path.join(self.tmpdir, 'Data.fs'))
        self.addCleanup(self.storage.close)
        self.db = DB(self.storage)
        self.addCleanup(self.db.close)
        self.conn = self.db.open()
        self.addCleanup(self.conn.close)

    def tearDown(self):
        transaction.abort()

    def packDatabase(self):
        self.db.pack()
Beispiel #3
0
class DecaWorld:
	"""The DECA world object.

	This object controls the database and the file structure for the DECA world.
	The constructor argument *filename* allows restore previously saved world from file.

	:attribute ID_Repository: Internal layer to store repository: templates, objects
		and shapes. It always exists, and can't be removed.
	:attribute ID_Configuration: Internal pseudo-layer to store configuration data.
		This storage is just the dictionary to save arbitrary data in the world's storage.
		It always exists, and can't be removed.
	"""

	ID_Repository = '@repository'

	ID_Configuration = '@configuration'

	def __init__(self, filename = None, **argsdict):
		self.roots = None
		self.propsGrid = None
		self.Modified = False
		self.Initial = False
		self.db_path = None
		self.layers = {}
		self.Filename = filename
		self.wfs = tempfile.mkdtemp(suffix='.deca')
		if filename is not None:
			# open filesystem
			try:
				f = zipfile.ZipFile(self.Filename)
				f.extractall(self.wfs)
				f.close()
				self.db_path = os.path.join(self.wfs, 'database')
				self.db_path = os.path.join(self.db_path, 'filestorage.sampo')
				if not os.path.exists(self.db_path) :
					self.db_path = None
			except Exception:
				self.db_path = None
		if self.db_path is None :
			# create new world filesystem if file not given, or error occurred
			if filename is None :
				self.Initial = True
			try:
				# create initial structure
				os.makedirs(os.path.join(self.wfs, 'profiles'))
				os.makedirs(self.PixmapsPath)
				os.makedirs(os.path.join(self.wfs, 'database'))
				os.makedirs(self.EnginesPath)
				os.makedirs(os.path.join(self.EnginesPath, 'Default'))	# engines for 'Default' template
				os.makedirs(os.path.join(self.EnginesPath, 'layer'))	# engines for layer
				os.makedirs(self.ReportsPath)
				os.makedirs(self.ResultsPath)
				os.makedirs(self.AttachmentsPath)

				self.db_path = os.path.join(self.wfs, 'database')
				self.db_path = os.path.join(self.db_path, 'filestorage.sampo')

				cname = os.path.join(ed_glob.CONFIG['GLOBAL_PROFILE_DIR'], 'default.ppb')
				f = open(cname, 'rb')
				store = open(os.path.join(self.wfs, 'profiles' + os.sep + 'default.ppb'), 'wb')
				store.write(f.read())
				store.close()
			except Exception as cond:
				wx.MessageBox(_("Can't create world's storage!\n%s\nExiting...") % cond, _("Sampo Framework"), wx.OK | wx.CENTER | wx.ICON_ERROR)
				wx.GetApp().Exit()
		# initialize repository filters
		if not os.path.exists(os.path.join(self.wfs, '.hgignore')):
			f = open(os.path.join(self.wfs, '.hgignore'), 'w')
			f.write('syntax: glob\n')
			f.write('.hg-*\n')
			f.write('profiles\n')
			f.write('database\n')
			f.write('results\n')
			f.write('attachments\n')
			f.write('*.pyc\n')
			f.write('*.pyo\n')
			f.close()
		self._repo = HgCommon.HgCreate(self.wfs, **argsdict)
		# initialize database
		ed_glob.CONFIG['PROFILE_DIR'] = os.path.join(self.wfs, 'profiles')
		# todo: load profile value and swich to remote DB if necessary

		# drop database indexes if platrorm doesn't match with previous save
		try :
			saved_platform = ''
			try:
				f = open(os.path.join(self.wfs, 'profiles' + os.sep + 'platform.dat'), 'r')
				saved_platform = f.read()
				f.close()
			except Exception:
				pass
			if saved_platform != platform.system() :
				os.unlink(self.db_path + '.index')
				os.unlink(self.db_path + '.tmp')
		except Exception:
			pass

		try:
			self.storage = FileStorage(self.db_path)
		except Exception:
			# try to remove indexes and reload base
			try:
				os.unlink(self.db_path + '.index')
				os.unlink(self.db_path + '.tmp')
				os.unlink(self.db_path + '.lock')
			except Exception:
				pass
			self.storage = FileStorage(self.db_path)
		self.odb = DB(self.storage)
		self.connection = self.odb.open()
		self.roots = self.connection.root()
		self.transaction = self.connection.transaction_manager.begin()
		# always create object's repository
		if self.Initial :
			self.roots[self.ID_Repository] = DecaRepoStorage()
			self.roots[self.ID_Configuration] = PersistentMapping()
		self.layers[self.ID_Repository] = DecaRepo(self, self.ID_Repository)
		# world created

	def __del__(self):
		self.Destroy()

	def _commit(self):
		self.transaction.commit()
		self.transaction = self.connection.transaction_manager.begin()

	def Rollback(self):
		"""Rollback(self)

		Rollback current transaction.
		
		Undos all changes in database since last save
		Note: this operation can't be reversed."""
		pass

	def Destroy(self):
		"""Destroy(self)

		Finalize world usage. Save world and delete the directory tree"""
		if not self.Initial :
			self.Save()
		self.odb.close()
		self.HgRepository.close()
		shutil.rmtree(self.wfs, True)

	def Save(self, fname = None):
		"""Save(self, String fname = None)

		Flush current world's storage. Stores into the reviously given file, or to the
		file given as function parameter. If no file name given and world dosen't created
		from file or never was saved the diagnostical message appear.
		
		:param fname: file name to store the world to. May be *None* to use previously given file name
		:type fname: string or None"""
		if self.Initial and fname is None:
			wx.MessageBox(_("Can't save world's storage without path-name"), _("Sampo Framework"), wx.OK | wx.CENTER | wx.ICON_ERROR)
			return

		if self.Initial and fname is not None:
			# move to normal file
			self.Filename = fname
			self.Initial = False
		# Save
		try:
			self._commit()
			self.odb.pack()
			f = zipfile.ZipFile(self.Filename, 'w')
			arg = (f, self.wfs)
			os.path.walk(self.wfs, walk_visit, arg)
			f.close()
			self.Modified = False
			# save platform marker
			f = open(os.path.join(self.wfs, 'profiles' + os.sep + 'platform.dat'), 'w')
			f.write(platform.system())
			f.close()
		except Exception as cond:
			wx.MessageBox(_("Can't save world's storage!\n%s") % cond, _("Sampo Framework"), wx.OK | wx.CENTER | wx.ICON_ERROR)
		# end of Save

	def GetLayer(self, name):
		"""GetLayer(self, String name)

		Return Deca.Layer with th given name from the world. If layer with given name doesn't exists
		the new layer created.
		
		:param name: name of the desired layer
		:type name: string or None
		:returns: :class:`Deca.Layer` object"""
		if name in self.layers.keys():
			return self.layers[name]

		if name not in self.roots.keys() :
			self.roots[name] = DecaLayerStorage()
		self.layers[name] = DecaLayer(self, name)
		return self.layers[name]

	def GetLayerList(self):
		"""Build list of layers names for layers existing in this world.

		:returns: the list of layers names existing in this world"""
		return self.roots.keys()
		
	def DeleteLayer(self, name):
		"""DeleteLayer(self, name)

		Removes given layer from world. This operation **destroes all data on this layer** and can't be undone.
		
		:param name: name of the desired layer
		:type name: string or None"""
		if name in self.layers.keys():
			del self.layers[name]
		if name in self.roots.keys():
			del self.roots[name]

	def GetPropList(self, holder = None) :
		self.propsGrid = holder
		result = OrderedDict([
			("World's properties",  OrderedDict([
				("Layers", len(self.roots.keys()) - 2)
			]))
		])
		return result

	def FindObject(self, oid, includeReflections=False):
		"""Search for object(s) with given ID through all layers in the world.
		
		:param oid: Object identifier
		:param includeReflections: need to include reflections into results? False by default.
		:returns: list of two-values tuples. First value - the object reference, second - the layer name"""
		result = []
		for ln,l in self.roots.items():
			if ln != self.ID_Configuration:
				for oi,oo in l.objects.items():
					if oi == oid and (not oo.IsReflection or includeReflections):
						result.append((oo, ln))
					# check for reflections
				# foreach object
			# not configuration layer
		# foreach layer
		return result

	def GetShapes(self):
		"""Search for object(s) with given ID through all layers in the world.

		:returns: list of string for names of existing shapes"""
		try:
			return [os.path.splitext(f)[0] for f in os.listdir(self.ShapesPath) if os.path.splitext(f)[1].lower() == '.py']
		except Exception:
			return []

	@property
	def Configuration(self):
		"""Dictionary of configuration values"""
		return self.roots[self.ID_Configuration]

	@property
	def EnginesPath(self):
		"""String for current used OS path there engines hierarchy stored"""
		return os.path.join(self.wfs, 'engines')

	@property
	def ReportsPath(self):
		"""String for current used OS path there reports hierarchy stored"""
		return os.path.join(self.wfs, 'reports')

	@property
	def ResultsPath(self):
		"""String for current used OS path there reports results are stored"""
		return os.path.join(self.wfs, 'results')

	@property
	def AttachmentsPath(self):
		"""String for current used OS path there reports attachments are stored"""
		return os.path.join(self.wfs, 'attachments')

	@property
	def PixmapsPath(self):
		"""String for current used OS path there world-related images are stored"""
		return os.path.join(self.wfs, 'pixmaps')

	@property
	def ShapesPath(self):
		"""String for current used OS path there shapes definitions are stored"""
		return os.path.join(self.wfs, 'shapes')

	@property
	def HgRepository(self):
		"""Mercurial repository for source control"""
		return self._repo
Beispiel #4
0
class DecaWorld:
    """The DECA world object.

	This object controls the database and the file structure for the DECA world.
	The constructor argument *filename* allows restore previously saved world from file.

	:attribute ID_Repository: Internal layer to store repository: templates, objects
		and shapes. It always exists, and can't be removed.
	:attribute ID_Configuration: Internal pseudo-layer to store configuration data.
		This storage is just the dictionary to save arbitrary data in the world's storage.
		It always exists, and can't be removed.
	"""

    ID_Repository = '@repository'

    ID_Configuration = '@configuration'

    def __init__(self, filename=None, **argsdict):
        self.roots = None
        self.propsGrid = None
        self.Modified = False
        self.Initial = False
        self.db_path = None
        self.layers = {}
        self.Filename = filename
        self.wfs = tempfile.mkdtemp(suffix='.deca')
        if filename is not None:
            # open filesystem
            try:
                f = zipfile.ZipFile(self.Filename)
                f.extractall(self.wfs)
                f.close()
                self.db_path = os.path.join(self.wfs, 'database')
                self.db_path = os.path.join(self.db_path, 'filestorage.sampo')
                if not os.path.exists(self.db_path):
                    self.db_path = None
            except Exception:
                self.db_path = None
        if self.db_path is None:
            # create new world filesystem if file not given, or error occurred
            if filename is None:
                self.Initial = True
            try:
                # create initial structure
                os.makedirs(os.path.join(self.wfs, 'profiles'))
                os.makedirs(self.PixmapsPath)
                os.makedirs(os.path.join(self.wfs, 'database'))
                os.makedirs(self.EnginesPath)
                os.makedirs(
                    os.path.join(self.EnginesPath,
                                 'Default'))  # engines for 'Default' template
                os.makedirs(os.path.join(self.EnginesPath,
                                         'layer'))  # engines for layer
                os.makedirs(self.ReportsPath)
                os.makedirs(self.ResultsPath)
                os.makedirs(self.AttachmentsPath)

                self.db_path = os.path.join(self.wfs, 'database')
                self.db_path = os.path.join(self.db_path, 'filestorage.sampo')

                cname = os.path.join(ed_glob.CONFIG['GLOBAL_PROFILE_DIR'],
                                     'default.ppb')
                f = open(cname, 'rb')
                store = open(
                    os.path.join(self.wfs,
                                 'profiles' + os.sep + 'default.ppb'), 'wb')
                store.write(f.read())
                store.close()
            except Exception as cond:
                wx.MessageBox(
                    _("Can't create world's storage!\n%s\nExiting...") % cond,
                    _("Sampo Framework"), wx.OK | wx.CENTER | wx.ICON_ERROR)
                wx.GetApp().Exit()
        # initialize repository filters
        if not os.path.exists(os.path.join(self.wfs, '.hgignore')):
            f = open(os.path.join(self.wfs, '.hgignore'), 'w')
            f.write('syntax: glob\n')
            f.write('.hg-*\n')
            f.write('profiles\n')
            f.write('database\n')
            f.write('results\n')
            f.write('attachments\n')
            f.write('*.pyc\n')
            f.write('*.pyo\n')
            f.close()
        self._repo = HgCommon.HgCreate(self.wfs, **argsdict)
        # initialize database
        ed_glob.CONFIG['PROFILE_DIR'] = os.path.join(self.wfs, 'profiles')
        # todo: load profile value and swich to remote DB if necessary

        # drop database indexes if platrorm doesn't match with previous save
        try:
            saved_platform = ''
            try:
                f = open(
                    os.path.join(self.wfs,
                                 'profiles' + os.sep + 'platform.dat'), 'r')
                saved_platform = f.read()
                f.close()
            except Exception:
                pass
            if saved_platform != platform.system():
                os.unlink(self.db_path + '.index')
                os.unlink(self.db_path + '.tmp')
        except Exception:
            pass

        try:
            self.storage = FileStorage(self.db_path)
        except Exception:
            # try to remove indexes and reload base
            try:
                os.unlink(self.db_path + '.index')
                os.unlink(self.db_path + '.tmp')
                os.unlink(self.db_path + '.lock')
            except Exception:
                pass
            self.storage = FileStorage(self.db_path)
        self.odb = DB(self.storage)
        self.connection = self.odb.open()
        self.roots = self.connection.root()
        self.transaction = self.connection.transaction_manager.begin()
        # always create object's repository
        if self.Initial:
            self.roots[self.ID_Repository] = DecaRepoStorage()
            self.roots[self.ID_Configuration] = PersistentMapping()
        self.layers[self.ID_Repository] = DecaRepo(self, self.ID_Repository)
        # world created

    def __del__(self):
        self.Destroy()

    def _commit(self):
        self.transaction.commit()
        self.transaction = self.connection.transaction_manager.begin()

    def Rollback(self):
        """Rollback(self)

		Rollback current transaction.
		
		Undos all changes in database since last save
		Note: this operation can't be reversed."""
        pass

    def Destroy(self):
        """Destroy(self)

		Finalize world usage. Save world and delete the directory tree"""
        if not self.Initial:
            self.Save()
        self.odb.close()
        self.HgRepository.close()
        shutil.rmtree(self.wfs, True)

    def Save(self, fname=None):
        """Save(self, String fname = None)

		Flush current world's storage. Stores into the reviously given file, or to the
		file given as function parameter. If no file name given and world dosen't created
		from file or never was saved the diagnostical message appear.
		
		:param fname: file name to store the world to. May be *None* to use previously given file name
		:type fname: string or None"""
        if self.Initial and fname is None:
            wx.MessageBox(_("Can't save world's storage without path-name"),
                          _("Sampo Framework"),
                          wx.OK | wx.CENTER | wx.ICON_ERROR)
            return

        if self.Initial and fname is not None:
            # move to normal file
            self.Filename = fname
            self.Initial = False
        # Save
        try:
            self._commit()
            self.odb.pack()
            f = zipfile.ZipFile(self.Filename, 'w')
            arg = (f, self.wfs)
            os.path.walk(self.wfs, walk_visit, arg)
            f.close()
            self.Modified = False
            # save platform marker
            f = open(
                os.path.join(self.wfs, 'profiles' + os.sep + 'platform.dat'),
                'w')
            f.write(platform.system())
            f.close()
        except Exception as cond:
            wx.MessageBox(
                _("Can't save world's storage!\n%s") % cond,
                _("Sampo Framework"), wx.OK | wx.CENTER | wx.ICON_ERROR)
        # end of Save

    def GetLayer(self, name):
        """GetLayer(self, String name)

		Return Deca.Layer with th given name from the world. If layer with given name doesn't exists
		the new layer created.
		
		:param name: name of the desired layer
		:type name: string or None
		:returns: :class:`Deca.Layer` object"""
        if name in self.layers.keys():
            return self.layers[name]

        if name not in self.roots.keys():
            self.roots[name] = DecaLayerStorage()
        self.layers[name] = DecaLayer(self, name)
        return self.layers[name]

    def GetLayerList(self):
        """Build list of layers names for layers existing in this world.

		:returns: the list of layers names existing in this world"""
        return self.roots.keys()

    def DeleteLayer(self, name):
        """DeleteLayer(self, name)

		Removes given layer from world. This operation **destroes all data on this layer** and can't be undone.
		
		:param name: name of the desired layer
		:type name: string or None"""
        if name in self.layers.keys():
            del self.layers[name]
        if name in self.roots.keys():
            del self.roots[name]

    def GetPropList(self, holder=None):
        self.propsGrid = holder
        result = OrderedDict([("World's properties",
                               OrderedDict([("Layers",
                                             len(self.roots.keys()) - 2)]))])
        return result

    def FindObject(self, oid, includeReflections=False):
        """Search for object(s) with given ID through all layers in the world.
		
		:param oid: Object identifier
		:param includeReflections: need to include reflections into results? False by default.
		:returns: list of two-values tuples. First value - the object reference, second - the layer name"""
        result = []
        for ln, l in self.roots.items():
            if ln != self.ID_Configuration:
                for oi, oo in l.objects.items():
                    if oi == oid and (not oo.IsReflection
                                      or includeReflections):
                        result.append((oo, ln))
                    # check for reflections
                # foreach object
            # not configuration layer
        # foreach layer
        return result

    def GetShapes(self):
        """Search for object(s) with given ID through all layers in the world.

		:returns: list of string for names of existing shapes"""
        try:
            return [
                os.path.splitext(f)[0] for f in os.listdir(self.ShapesPath)
                if os.path.splitext(f)[1].lower() == '.py'
            ]
        except Exception:
            return []

    @property
    def Configuration(self):
        """Dictionary of configuration values"""
        return self.roots[self.ID_Configuration]

    @property
    def EnginesPath(self):
        """String for current used OS path there engines hierarchy stored"""
        return os.path.join(self.wfs, 'engines')

    @property
    def ReportsPath(self):
        """String for current used OS path there reports hierarchy stored"""
        return os.path.join(self.wfs, 'reports')

    @property
    def ResultsPath(self):
        """String for current used OS path there reports results are stored"""
        return os.path.join(self.wfs, 'results')

    @property
    def AttachmentsPath(self):
        """String for current used OS path there reports attachments are stored"""
        return os.path.join(self.wfs, 'attachments')

    @property
    def PixmapsPath(self):
        """String for current used OS path there world-related images are stored"""
        return os.path.join(self.wfs, 'pixmaps')

    @property
    def ShapesPath(self):
        """String for current used OS path there shapes definitions are stored"""
        return os.path.join(self.wfs, 'shapes')

    @property
    def HgRepository(self):
        """Mercurial repository for source control"""
        return self._repo
Beispiel #5
0
handler1.setFormatter(formatter)
logger.addHandler(handler1)
if not hasattr(sys, "frozen"):
    handler2 = logging.StreamHandler(sys.stdout)
    handler2.setFormatter(formatter)
    logger.addHandler(handler2)
log = logging.getLogger(__name__)

CFG = QtCore.QSettings('zipta.ru', APPNAME)

SPLASH.message(u'Оптимизируем базу')

storage = FileStorage(os.path.join(HOMEDIR, 'db'),
                      blob_dir=os.path.join(HOMEDIR, 'blobs'))
DB = ZDB(storage)
DB.pack(days=3)


def zodbclose():
    DB.close()
    storage.close()


atexit.register(zodbclose)
'''
# пример замены метода
try:
    u = __import__('userdata')
    model.add_item = types.MethodType(u.add_item, model)
except ImportError:
    pass
Beispiel #6
0
class Indexer(object):

    filestorage = database = connection = root = None

    def __init__(self, datafs, writable=0, trans=0, pack=0):
        self.trans_limit = trans
        self.pack_limit = pack
        self.trans_count = 0
        self.pack_count = 0
        self.stopdict = get_stopdict()
        self.mh = mhlib.MH()
        self.filestorage = FileStorage(datafs, read_only=(not writable))
        self.database = DB(self.filestorage)
        self.connection = self.database.open()
        self.root = self.connection.root()
        try:
            self.index = self.root["index"]
        except KeyError:
            self.index = self.root["index"] = TextIndexWrapper()
        try:
            self.docpaths = self.root["docpaths"]
        except KeyError:
            self.docpaths = self.root["docpaths"] = IOBTree()
        try:
            self.doctimes = self.root["doctimes"]
        except KeyError:
            self.doctimes = self.root["doctimes"] = IIBTree()
        try:
            self.watchfolders = self.root["watchfolders"]
        except KeyError:
            self.watchfolders = self.root["watchfolders"] = {}
        self.path2docid = OIBTree()
        for docid in self.docpaths.keys():
            path = self.docpaths[docid]
            self.path2docid[path] = docid
        try:
            self.maxdocid = max(self.docpaths.keys())
        except ValueError:
            self.maxdocid = 0
        print(len(self.docpaths), "Document ids")
        print(len(self.path2docid), "Pathnames")
        print(self.index.lexicon.length(), "Words")

    def dumpfreqs(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        L = []
        for wid in lexicon.wids():
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            L.append((freq, wid, lexicon.get_word(wid)))
        L.sort()
        L.reverse()
        for freq, wid, word in L:
            print("%10d %10d %s" % (wid, freq, word))

    def dumpwids(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        for wid in lexicon.wids():
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            print("%10d %10d %s" % (wid, freq, lexicon.get_word(wid)))

    def dumpwords(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        for word in lexicon.words():
            wid = lexicon.get_wid(word)
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            print("%10d %10d %s" % (wid, freq, word))

    def close(self):
        self.root = None
        if self.connection is not None:
            self.connection.close()
            self.connection = None
        if self.database is not None:
            self.database.close()
            self.database = None
        if self.filestorage is not None:
            self.filestorage.close()
            self.filestorage = None

    def interact(self, nbest=NBEST, maxlines=MAXLINES):
        try:
            import readline
        except ImportError:
            pass
        text = ""
        top = 0
        results = []
        while 1:
            try:
                line = raw_input("Query: ")
            except EOFError:
                print("\nBye.")
                break
            line = line.strip()
            if line.startswith("/"):
                self.specialcommand(line, results, top - nbest)
                continue
            if line:
                text = line
                top = 0
            else:
                if not text:
                    continue
            try:
                results, n = self.timequery(text, top + nbest)
            except KeyboardInterrupt:
                raise
            except:
                reportexc()
                text = ""
                continue
            if len(results) <= top:
                if not n:
                    print("No hits for %r." % text)
                else:
                    print("No more hits for %r." % text)
                text = ""
                continue
            print("[Results %d-%d from %d" % (top+1, min(n, top+nbest), n),
                  end=" ")
            print("for query %s]" % repr(text))
            self.formatresults(text, results, maxlines, top, top+nbest)
            top += nbest

    def specialcommand(self, line, results, first):
        assert line.startswith("/")
        line = line[1:]
        if not line:
            n = first
        else:
            try:
                n = int(line) - 1
            except:
                print("Huh?")
                return
        if n < 0 or n >= len(results):
            print("Out of range")
            return
        docid, score = results[n]
        path = self.docpaths[docid]
        i = path.rfind("/")
        assert i > 0
        folder = path[:i]
        n = path[i+1:]
        cmd = "show +%s %s" % (folder, n)
        if os.getenv("DISPLAY"):
            os.system("xterm -e  sh -c '%s | less' &" % cmd)
        else:
            os.system(cmd)

    def query(self, text, nbest=NBEST, maxlines=MAXLINES):
        results, n = self.timequery(text, nbest)
        if not n:
            print("No hits for %r." % text)
            return
        print("[Results 1-%d from %d]" % (len(results), n))
        self.formatresults(text, results, maxlines)

    def timequery(self, text, nbest):
        t0 = time.time()
        c0 = time.clock()
        results, n = self.index.query(text, 0, nbest)
        t1 = time.time()
        c1 = time.clock()
        print("[Query time: %.3f real, %.3f user]" % (t1-t0, c1-c0))
        return results, n

    def formatresults(self, text, results, maxlines=MAXLINES,
                      lo=0, hi=sys.maxint):
        stop = self.stopdict.has_key
        words = [w for w in re.findall(r"\w+\*?", text.lower()) if not stop(w)]
        pattern = r"\b(" + "|".join(words) + r")\b"
        pattern = pattern.replace("*", ".*") # glob -> re syntax
        prog = re.compile(pattern, re.IGNORECASE)
        print('='*70)
        rank = lo
        for docid, score in results[lo:hi]:
            rank += 1
            path = self.docpaths[docid]
            score *= 100.0
            print("Rank:    %d   Score: %d%%   File: %s" % (rank, score, path))
            path = os.path.join(self.mh.getpath(), path)
            try:
                fp = open(path)
            except (IOError, OSError) as msg:
                print("Can't open:", msg)
                continue
            msg = mhlib.Message("<folder>", 0, fp)
            for header in "From", "To", "Cc", "Bcc", "Subject", "Date":
                h = msg.getheader(header)
                if h:
                    print("%-8s %s" % (header+":", h))
            text = self.getmessagetext(msg)
            if text:
                print()
                nleft = maxlines
                for part in text:
                    for line in part.splitlines():
                        if prog.search(line):
                            print(line)
                            nleft -= 1
                            if nleft <= 0:
                                break
                    if nleft <= 0:
                        break
            print('-'*70)

    def update(self, args):
        folder = None
        seqs = []

        for arg in args:
            if arg.startswith("+"):
                if folder is None:
                    folder = arg[1:]
                else:
                    print("only one folder at a time")
                    return
            else:
                seqs.append(arg)

        if not folder:
            folder = self.mh.getcontext()
        if not seqs:
            seqs = ['all']

        try:
            f = self.mh.openfolder(folder)
        except mhlib.Error as msg:
            print(msg)
            return

        dict = {}
        for seq in seqs:
            try:
                nums = f.parsesequence(seq)
            except mhlib.Error as msg:
                print(msg or "unparsable message sequence: %s" % repr(seq))
                return
            for n in nums:
                dict[n] = n
        msgs = dict.keys()
        msgs.sort()

        self.updatefolder(f, msgs)
        self.commit()

    def optimize(self, args):
        uniqwords = {}
        for folder in args:
            if folder.startswith("+"):
                folder = folder[1:]
            print("\nOPTIMIZE FOLDER", folder)
            try:
                f = self.mh.openfolder(folder)
            except mhlib.Error as msg:
                print(msg)
                continue
            self.prescan(f, f.listmessages(), uniqwords)
        L = [(uniqwords[word], word) for word in uniqwords.keys()]
        L.sort()
        L.reverse()
        for i in range(100):
            print("%3d. %6d %s" % ((i+1,) + L[i]))
        self.index.lexicon.sourceToWordIds([word for (count, word) in L])

    def prescan(self, f, msgs, uniqwords):
        pipeline = [Splitter(), CaseNormalizer(), StopWordRemover()]
        for n in msgs:
            print("prescanning", n)
            m = f.openmessage(n)
            text = self.getmessagetext(m, f.name)
            for p in pipeline:
                text = p.process(text)
            for word in text:
                uniqwords[word] = uniqwords.get(word, 0) + 1

    def bulkupdate(self, args):
        if not args:
            print("No folders specified; use ALL to bulk-index all folders")
            return
        if "ALL" in args:
            i = args.index("ALL")
            args[i:i+1] = self.mh.listfolders()
        for folder in args:
            if folder.startswith("+"):
                folder = folder[1:]
            print("\nFOLDER", folder)
            try:
                f = self.mh.openfolder(folder)
            except mhlib.Error as msg:
                print(msg)
                continue
            self.updatefolder(f, f.listmessages())
            print("Total", len(self.docpaths))
        self.commit()
        print("Indexed", self.index.lexicon._nbytes, "bytes and",)
        print(self.index.lexicon._nwords, "words;",)
        print(len(self.index.lexicon._words), "unique words.")

    def updatefolder(self, f, msgs):
        self.watchfolders[f.name] = self.getmtime(f.name)
        for n in msgs:
            path = "%s/%s" % (f.name, n)
            docid = self.path2docid.get(path, 0)
            if docid and self.getmtime(path) == self.doctimes.get(docid, 0):
                print("unchanged", docid, path)
                continue
            docid = self.newdocid(path)
            try:
                m = f.openmessage(n)
            except IOError:
                print("disappeared", docid, path)
                self.unindexpath(path)
                continue
            text = self.getmessagetext(m, f.name)
            if not text:
                self.unindexpath(path)
                continue
            print("indexing", docid, path)
            self.index.index_doc(docid, text)
            self.maycommit()
        # Remove messages from the folder that no longer exist
        for path in list(self.path2docid.keys(f.name)):
            if not path.startswith(f.name + "/"):
                break
            if self.getmtime(path) == 0:
                self.unindexpath(path)
        print("done.")

    def unindexpath(self, path):
        if self.path2docid.has_key(path):
            docid = self.path2docid[path]
            print("unindexing", docid, path)
            del self.docpaths[docid]
            del self.doctimes[docid]
            del self.path2docid[path]
            try:
                self.index.unindex_doc(docid)
            except KeyError as msg:
                print("KeyError", msg)
            self.maycommit()

    def getmessagetext(self, m, name=None):
        L = []
        if name:
            L.append("_folder " + name) # To restrict search to a folder
            self.getheaders(m, L)
        try:
            self.getmsgparts(m, L, 0)
        except KeyboardInterrupt:
            raise
        except:
            print("(getmsgparts failed:)")
            reportexc()
        return L

    def getmsgparts(self, m, L, level):
        ctype = m.gettype()
        if level or ctype != "text/plain":
            print(". "*level + str(ctype))
        if ctype == "text/plain":
            L.append(m.getbodytext())
        elif ctype in ("multipart/alternative", "multipart/mixed"):
            for part in m.getbodyparts():
                self.getmsgparts(part, L, level+1)
        elif ctype == "message/rfc822":
            f = StringIO(m.getbodytext())
            m = mhlib.Message("<folder>", 0, f)
            self.getheaders(m, L)
            self.getmsgparts(m, L, level+1)

    def getheaders(self, m, L):
        H = []
        for key in "from", "to", "cc", "bcc", "subject":
            value = m.get(key)
            if value:
                H.append(value)
        if H:
            L.append("\n".join(H))

    def newdocid(self, path):
        docid = self.path2docid.get(path)
        if docid is not None:
            self.doctimes[docid] = self.getmtime(path)
            return docid
        docid = self.maxdocid + 1
        self.maxdocid = docid
        self.docpaths[docid] = path
        self.doctimes[docid] = self.getmtime(path)
        self.path2docid[path] = docid
        return docid

    def getmtime(self, path):
        path = os.path.join(self.mh.getpath(), path)
        try:
            st = os.stat(path)
        except os.error as msg:
            return 0
        return int(st[ST_MTIME])

    def maycommit(self):
        self.trans_count += 1
        if self.trans_count >= self.trans_limit > 0:
            self.commit()

    def commit(self):
        if self.trans_count > 0:
            print("committing...")
            transaction.commit()
            self.trans_count = 0
            self.pack_count += 1
            if self.pack_count >= self.pack_limit > 0:
                self.pack()

    def pack(self):
        if self.pack_count > 0:
            print("packing...")
            self.database.pack()
            self.pack_count = 0
Beispiel #7
0
logger.addHandler(handler1)
if not hasattr(sys, "frozen"):
    handler2 = logging.StreamHandler(sys.stdout)
    handler2.setFormatter(formatter)
    logger.addHandler(handler2)
log = logging.getLogger(__name__)

CFG = QtCore.QSettings('zipta.ru', APPNAME)


SPLASH.message(u'Оптимизируем базу')


storage = FileStorage(os.path.join(HOMEDIR, 'db'), blob_dir=os.path.join(HOMEDIR, 'blobs'))
DB = ZDB(storage)
DB.pack(days=3)


def zodbclose():
    DB.close()
    storage.close()

atexit.register(zodbclose)

'''
# пример замены метода
try:
    u = __import__('userdata')
    model.add_item = types.MethodType(u.add_item, model)
except ImportError:
    pass
Beispiel #8
0
			logga('Lug <'+id+'> invio notifiche')
			report.append('\n- - ----> Lug: '+zodb[id].id+' ('+str(zodb[id].numero_errori)+'/'+str(zodb[id].numero_controlli)+') <---- - -\n')
			for rigo in zodb[id].notifiche: report.append(rigo)
			report.append('\n        * Dati DB *')
			report.append('Url : ' + zodb[id].url + '   Email: '+zodb[id].contatto)
			report.append('Dove: ' + zodb[id].regione.capitalize() + ' -> '+zodb[id].provincia + ' -> ' +zodb[id].zona)
	logga('fine invio notifiche')

	if report:
		report.insert(0, 'Spazzino: report del ' +
					  time.strftime('%d/%m/%y', time.gmtime(orario_partenza)) +
					  ' dalle ' +
					  time.strftime('%H:%M', time.gmtime(orario_partenza)) +
					  ' alle ' +
					  time.strftime('%H:%M', time.gmtime(time.time()))
					  )
		try:
			mail = notifiche.email(mittente	= 'Spazzino <*****@*****.**>',
							   destinatario	= ['Gelma <*****@*****.**>'],
							   oggetto 		= 'LugMap: report data (UTC) '+str(datetime.datetime.utcnow()),
							   testo		= report,
							   invia_subito	= True) # Se da Aggiornare, vedi Guida Intergalattica alla LugMap §4.1
		except: # se fallisco stampo a video, così mi arriva come mail via cron
			print '\n'.join(report)

transaction.commit()
db.pack()
db.close()
os.remove(pidfile)
logga('concluso')
Beispiel #9
0
print(d.getSize())


print('initializing...')
container = PersistentMapping()
c.root()['container'] = container
container_size = 10000
for i in range(container_size):
    container[i] = PersistentMapping()
transaction.commit()

print('generating transactions...')
for trans in range(100):
    print(trans)
    sources = (random.randint(0, container_size - 1) for j in range(100))
    for source in sources:
        obj = container[source]
        obj[trans] = container[random.randint(0, container_size - 1)]
    transaction.commit()

print('size:')
print(d.getSize())

print('packing...')
d.pack()

print('size:')
print(d.getSize())

d.close()
class PersistentInterfaceTest(util.TestCase):

    def setUp(self):
        super(PersistentInterfaceTest, self).setUp()
        util.setUp(self)
        self.db = DB(FileStorage('PersistentInterfaceTest.fs'))
        self.conn = self.db.open()
        self.root = self.conn.root()
        self.registry = ManagedRegistry()
        self.root["registry"] = self.registry
        transaction.commit()

    def tearDown(self):
        transaction.abort() # just in case
        self.conn.close()
        self.db.close()
        util.tearDown(self)

    def test_creation(self):
        class IFoo(PersistentInterface):
            pass

        class Foo(object):
            implements(IFoo)

        self.assert_(IFoo.providedBy(Foo()))
        self.assertEqual(IFoo._p_oid, None)

    def test_patch(self):
        self.registry.newModule("imodule", code)
        transaction.commit()
        imodule = self.registry.findModule("imodule")

        # test for a pickling bug
        self.assertEqual(imodule.Foo.__implemented__, imodule.IFoo)

        self.assert_(imodule.IFoo.providedBy(imodule.aFoo))
        # the conversion should not affect Interface
        self.assert_(imodule.Interface is Interface)

    def test___hash___no_jar(self):
        class IFoo(PersistentInterface):
            pass
        self.assertEqual(hash(IFoo), hash((None, None)))

    def test___hash___w_jar(self):
        self.registry.newModule("imodule", code)
        transaction.commit()
        imodule = self.registry.findModule("imodule")
        self.assertEqual(hash(imodule.IFoo),
                         hash((self.conn, imodule.IFoo._p_oid)))

    def test___eq___no_jar(self):
        class IFoo(PersistentInterface):
            pass
        class IBar(PersistentInterface):
            pass
        self.failUnless(IFoo == IFoo)
        self.failIf(IFoo == IBar)

    def test___eq___w_jar(self):
        class IFoo(PersistentInterface):
            pass
        self.registry.newModule("imodule", code)
        transaction.commit()
        imodule = self.registry.findModule("imodule")
        self.failUnless(imodule.IFoo == imodule.IFoo) # Don't use assertEqual
        self.failIf(imodule.IFoo == imodule.ISpam)
        self.failIf(imodule.IFoo == IFoo)

    def test___ne___no_jar(self):
        class IFoo(PersistentInterface):
            pass
        class IBar(PersistentInterface):
            pass
        self.failIf(IFoo != IFoo)
        self.failUnless(IFoo != IBar)

    def test___ne___w_jar(self):
        class IFoo(PersistentInterface):
            pass
        self.registry.newModule("imodule", code)
        transaction.commit()
        imodule = self.registry.findModule("imodule")
        self.failIf(imodule.IFoo != imodule.IFoo) # Don't use assertNotEqual
        self.failUnless(imodule.IFoo != imodule.ISpam)
        self.failUnless(imodule.IFoo != IFoo)

    def test_provides(self):
        """Provides are persistent."""

        self.registry.newModule("barmodule", bar_code)
        barmodule = self.registry.findModule("barmodule")

        bar = Bar()
        directlyProvides(bar, barmodule.IBar)
        self.root['bar'] = bar
        self.assertTrue(barmodule.IBar.providedBy(bar))

        bah = Bar()
        directlyProvides(bah, barmodule.IBah)
        self.root['bah'] = bah
        self.assertTrue(barmodule.IBah.providedBy(bah))

        blah = Bar()
        directlyProvides(blah, barmodule.IBlah)
        self.root['blah'] = blah
        self.assertTrue(barmodule.IBlah.providedBy(blah))

        # Update the code to make sure everything works on update
        self.registry.updateModule('barmodule',
                                   bar_code + '\nfoo = 1')

        transaction.commit()

        self.db.close()
        db = DB(FileStorage('PersistentInterfaceTest.fs'))
        root = db.open().root()

        barmodule = root['registry'].findModule("barmodule")

        bar = root['bar']
        self.assertTrue(barmodule.IBar.providedBy(bar))

        bah = root['bah']
        self.assertTrue(barmodule.IBah.providedBy(bah))

        blah = root['blah']
        self.assertTrue(barmodule.IBlah.providedBy(blah))

        db.close()

    def test_persistentWeakref(self):
        """Verify interacton of declaration weak refs with ZODB

        Weak references to persistent objects don't remain after ZODB
        pack and garbage collection."""

        bar = self.root['bar'] = Bar()
        self.registry.newModule("barmodule", bar_code)
        barmodule = self.registry.findModule("barmodule")
        self.assertEqual(barmodule.IBar.dependents.keys(), [])
        directlyProvides(bar, barmodule.IBar)
        self.assertEqual(len(barmodule.IBar.dependents), 1)

        transaction.commit()
        del bar
        del self.root['bar']
        self.db.pack()
        transaction.commit()
        collect()

        root = self.db.open().root()
        barmodule = root['registry'].findModule("barmodule")
        self.assertEqual(barmodule.IBar.dependents.keys(), [])

    def test_persistentProvides(self):
        """Verify that provideInterface works."""

        self.registry.newModule("barmodule", provide_iface_code)
        barmodule = self.registry.findModule("barmodule")
        self.assertTrue(IBarInterface.providedBy(barmodule.IBar))

        self.registry.updateModule('barmodule',
                                   provide_iface_code + '\nfoo = 1')
        transaction.commit()
        barmodule = self.registry.findModule("barmodule")
        self.assertTrue(IBarInterface.providedBy(barmodule.IBar))
Beispiel #11
0
	for id in sorted(zodb.keys()):
		if zodb[id].notifiche:
			logga('BusinessMap <'+id+'> invio notifiche')
			report.append('\n- - ----> BusinessMap: '+zodb[id].id+' ('+str(zodb[id].numero_errori)+'/'+str(zodb[id].numero_controlli)+') <---- - -\n')
			for rigo in zodb[id].notifiche: report.append(rigo)
			report.append('\n        * Dati DB *')
			report.append('Url : ' + zodb[id].url + '   Email: '+zodb[id].contatto)
			report.append('Dove: ' + zodb[id].regione.capitalize() + ' -> '+zodb[id].provincia + ' -> ' +zodb[id].zona)
	logga('fine invio notifiche')

	if report:
		report.insert(0, 'BusinessMap: report del ' +
					  time.strftime('%d/%m/%y', time.gmtime(orario_partenza)) +
					  ' dalle ' +
					  time.strftime('%H:%M', time.gmtime(orario_partenza)) +
					  ' alle ' +
					  time.strftime('%H:%M', time.gmtime(time.time()))
					  )

		mail = notifiche.email(mittente	= 'BusinessMap <*****@*****.**>',
					destinatario	= ['Paolo Foletto <*****@*****.**>','*****@*****.**'],
					oggetto 		= 'BusinessMap: report data (UTC) '+str(datetime.datetime.utcnow()),
					testo		= report,
					invia_subito	= True)

transaction.commit()
db.pack()
db.close()
os.remove(pidfile)
logga('concluso')
Beispiel #12
0
print d.getSize()

if 1:
    print 'initializing...'
    container = PersistentMapping()
    c.root()['container'] = container
    container_size = 10000
    for i in range(container_size):
        container[i] = PersistentMapping()
    transaction.commit()

    print 'generating transactions...'
    for trans in range(100):
        print trans
        sources = (random.randint(0, container_size - 1) for j in range(100))
        for source in sources:
            obj = container[source]
            obj[trans] = container[random.randint(0, container_size - 1)]
        transaction.commit()

    print 'size:'
    print d.getSize()

print 'packing...'
d.pack()

print 'size:'
print d.getSize()

d.close()
Beispiel #13
0
class DBConnection:

    def __init__(self, minfo):
        self.connection = None
        self.root = None

        self.storage = ClientStorage.ClientStorage(
            minfo.getRoomBookingDBConnectionParams(),
            username=minfo.getRoomBookingDBUserName(),
            password=minfo.getRoomBookingDBPassword(),
            realm=minfo.getRoomBookingDBRealm())
        self.db = DB(self.storage)

    def connect(self):
        if not self.isConnected():
            if DALManager.usesMainIndicoDB():
                self.connection = DBMgr.getInstance().getDBConnection()
            else:
                self.connection = self.db.open()
            self.root = self.connection.root()

    def isConnected(self):
        if not self.connection:
            return False
        return True

    def getRoot(self, name=""):
        if name == "":
            return self.root
        elif self.root != None:
            if name in self.root.keys() and self.root[name]:
                return self.root[name]
            else:
                # create the branch
                if name in ["Rooms", "Reservations"]:
                    self.root[name] = IOBTree()
                elif name in ["RoomReservationsIndex", "UserReservationsIndex",
                              "DayReservationsIndex", "RoomDayReservationsIndex"]:
                    self.root[name] = OOBTree()
                elif name in ["EquipmentList", "CustomAttributesList"]:
                    self.root[name] = {}
                else:
                    return None
                return self.root[name]
        else:
            raise MaKaCError("Cannot connect to the room booking database")

    def disconnect(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.close()
            self.root = None
            self.connection = None

    def commit(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.transaction_manager.get().commit()

    def rollback(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.transaction_manager.get().abort()

    def sync(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.sync()

    def pack(self, days=1):
        if DALManager.usesMainIndicoDB():
            return
        self.db.pack(days=days)

    def transaction(self):
        """
        Calls the ZODB context manager for the connection
        """
        return self.db.transaction()
Beispiel #14
0
class DBConnection:
    def __init__(self, minfo):
        self.connection = None
        self.root = None

        self.storage = ClientStorage.ClientStorage(
            minfo.getRoomBookingDBConnectionParams(),
            username=minfo.getRoomBookingDBUserName(),
            password=minfo.getRoomBookingDBPassword(),
            realm=minfo.getRoomBookingDBRealm())
        self.db = DB(self.storage)

    def connect(self):
        if not self.isConnected():
            if DALManager.usesMainIndicoDB():
                self.connection = DBMgr.getInstance().getDBConnection()
            else:
                self.connection = self.db.open()
            self.root = self.connection.root()

    def isConnected(self):
        if not self.connection:
            return False
        return True

    def getRoot(self, name=""):
        if name == "":
            return self.root
        elif self.root != None:
            if name in self.root.keys() and self.root[name]:
                return self.root[name]
            else:
                # create the branch
                if name in ["Rooms", "Reservations"]:
                    self.root[name] = IOBTree()
                elif name in [
                        "RoomReservationsIndex", "UserReservationsIndex",
                        "DayReservationsIndex", "RoomDayReservationsIndex"
                ]:
                    self.root[name] = OOBTree()
                elif name in ["EquipmentList", "CustomAttributesList"]:
                    self.root[name] = {}
                else:
                    return None
                return self.root[name]
        else:
            raise MaKaCError("Cannot connect to the room booking database")

    def disconnect(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.close()
            self.root = None
            self.connection = None

    def commit(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.transaction_manager.get().commit()

    def rollback(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.transaction_manager.get().abort()

    def sync(self):
        if DALManager.usesMainIndicoDB():
            return
        if self.isConnected():
            self.connection.sync()

    def pack(self, days=1):
        if DALManager.usesMainIndicoDB():
            return
        self.db.pack(days=days)

    def transaction(self):
        """
        Calls the ZODB context manager for the connection
        """
        return self.db.transaction()
Beispiel #15
0
class Indexer(object):

    filestorage = database = connection = root = None

    def __init__(self, datafs, writable=0, trans=0, pack=0):
        self.trans_limit = trans
        self.pack_limit = pack
        self.trans_count = 0
        self.pack_count = 0
        self.stopdict = get_stopdict()
        self.mh = mhlib.MH()
        self.filestorage = FileStorage(datafs, read_only=(not writable))
        self.database = DB(self.filestorage)
        self.connection = self.database.open()
        self.root = self.connection.root()
        try:
            self.index = self.root["index"]
        except KeyError:
            self.index = self.root["index"] = TextIndexWrapper()
        try:
            self.docpaths = self.root["docpaths"]
        except KeyError:
            self.docpaths = self.root["docpaths"] = IOBTree()
        try:
            self.doctimes = self.root["doctimes"]
        except KeyError:
            self.doctimes = self.root["doctimes"] = IIBTree()
        try:
            self.watchfolders = self.root["watchfolders"]
        except KeyError:
            self.watchfolders = self.root["watchfolders"] = {}
        self.path2docid = OIBTree()
        for docid in self.docpaths.keys():
            path = self.docpaths[docid]
            self.path2docid[path] = docid
        try:
            self.maxdocid = max(self.docpaths.keys())
        except ValueError:
            self.maxdocid = 0
        print(len(self.docpaths), "Document ids")
        print(len(self.path2docid), "Pathnames")
        print(self.index.lexicon.length(), "Words")

    def dumpfreqs(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        L = []
        for wid in lexicon.wids():
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            L.append((freq, wid, lexicon.get_word(wid)))
        L.sort()
        L.reverse()
        for freq, wid, word in L:
            print("%10d %10d %s" % (wid, freq, word))

    def dumpwids(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        for wid in lexicon.wids():
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            print("%10d %10d %s" % (wid, freq, lexicon.get_word(wid)))

    def dumpwords(self):
        lexicon = self.index.lexicon
        index = self.index.index
        assert isinstance(index, OkapiIndex)
        for word in lexicon.words():
            wid = lexicon.get_wid(word)
            freq = 0
            for f in index._wordinfo.get(wid, {}).values():
                freq += f
            print("%10d %10d %s" % (wid, freq, word))

    def close(self):
        self.root = None
        if self.connection is not None:
            self.connection.close()
            self.connection = None
        if self.database is not None:
            self.database.close()
            self.database = None
        if self.filestorage is not None:
            self.filestorage.close()
            self.filestorage = None

    def interact(self, nbest=NBEST, maxlines=MAXLINES):
        try:
            import readline
        except ImportError:
            pass
        text = ""
        top = 0
        results = []
        while 1:
            try:
                line = raw_input("Query: ")
            except EOFError:
                print("\nBye.")
                break
            line = line.strip()
            if line.startswith("/"):
                self.specialcommand(line, results, top - nbest)
                continue
            if line:
                text = line
                top = 0
            else:
                if not text:
                    continue
            try:
                results, n = self.timequery(text, top + nbest)
            except KeyboardInterrupt:
                raise
            except:
                reportexc()
                text = ""
                continue
            if len(results) <= top:
                if not n:
                    print("No hits for %r." % text)
                else:
                    print("No more hits for %r." % text)
                text = ""
                continue
            print("[Results %d-%d from %d" % (top+1, min(n, top+nbest), n), end=' ')
            print("for query %s]" % repr(text))
            self.formatresults(text, results, maxlines, top, top+nbest)
            top += nbest

    def specialcommand(self, line, results, first):
        assert line.startswith("/")
        line = line[1:]
        if not line:
            n = first
        else:
            try:
                n = int(line) - 1
            except:
                print("Huh?")
                return
        if n < 0 or n >= len(results):
            print("Out of range")
            return
        docid, score = results[n]
        path = self.docpaths[docid]
        i = path.rfind("/")
        assert i > 0
        folder = path[:i]
        n = path[i+1:]
        cmd = "show +%s %s" % (folder, n)
        if os.getenv("DISPLAY"):
            os.system("xterm -e  sh -c '%s | less' &" % cmd)
        else:
            os.system(cmd)

    def query(self, text, nbest=NBEST, maxlines=MAXLINES):
        results, n = self.timequery(text, nbest)
        if not n:
            print("No hits for %r." % text)
            return
        print("[Results 1-%d from %d]" % (len(results), n))
        self.formatresults(text, results, maxlines)

    def timequery(self, text, nbest):
        t0 = time.time()
        c0 = time.clock()
        results, n = self.index.query(text, 0, nbest)
        t1 = time.time()
        c1 = time.clock()
        print("[Query time: %.3f real, %.3f user]" % (t1-t0, c1-c0))
        return results, n

    def formatresults(self, text, results, maxlines=MAXLINES,
                      lo=0, hi=sys.maxint):
        stop = self.stopdict.has_key
        words = [w for w in re.findall(r"\w+\*?", text.lower()) if not stop(w)]
        pattern = r"\b(" + "|".join(words) + r")\b"
        pattern = pattern.replace("*", ".*") # glob -> re syntax
        prog = re.compile(pattern, re.IGNORECASE)
        print('='*70)
        rank = lo
        for docid, score in results[lo:hi]:
            rank += 1
            path = self.docpaths[docid]
            score *= 100.0
            print("Rank:    %d   Score: %d%%   File: %s" % (rank, score, path))
            path = os.path.join(self.mh.getpath(), path)
            try:
                fp = open(path)
            except (IOError, OSError) as msg:
                print("Can't open:", msg)
                continue
            msg = mhlib.Message("<folder>", 0, fp)
            for header in "From", "To", "Cc", "Bcc", "Subject", "Date":
                h = msg.getheader(header)
                if h:
                    print("%-8s %s" % (header+":", h))
            text = self.getmessagetext(msg)
            if text:
                print()
                nleft = maxlines
                for part in text:
                    for line in part.splitlines():
                        if prog.search(line):
                            print(line)
                            nleft -= 1
                            if nleft <= 0:
                                break
                    if nleft <= 0:
                        break
            print('-'*70)

    def update(self, args):
        folder = None
        seqs = []

        for arg in args:
            if arg.startswith("+"):
                if folder is None:
                    folder = arg[1:]
                else:
                    print("only one folder at a time")
                    return
            else:
                seqs.append(arg)

        if not folder:
            folder = self.mh.getcontext()
        if not seqs:
            seqs = ['all']

        try:
            f = self.mh.openfolder(folder)
        except mhlib.Error as msg:
            print(msg)
            return

        dict = {}
        for seq in seqs:
            try:
                nums = f.parsesequence(seq)
            except mhlib.Error as msg:
                print(msg or "unparsable message sequence: %s" % repr(seq))
                return
            for n in nums:
                dict[n] = n
        msgs = dict.keys()
        msgs.sort()

        self.updatefolder(f, msgs)
        self.commit()

    def optimize(self, args):
        uniqwords = {}
        for folder in args:
            if folder.startswith("+"):
                folder = folder[1:]
            print("\nOPTIMIZE FOLDER", folder)
            try:
                f = self.mh.openfolder(folder)
            except mhlib.Error as msg:
                print(msg)
                continue
            self.prescan(f, f.listmessages(), uniqwords)
        L = [(uniqwords[word], word) for word in uniqwords.keys()]
        L.sort()
        L.reverse()
        for i in range(100):
            print("%3d. %6d %s" % ((i+1,) + L[i]))
        self.index.lexicon.sourceToWordIds([word for (count, word) in L])

    def prescan(self, f, msgs, uniqwords):
        pipeline = [Splitter(), CaseNormalizer(), StopWordRemover()]
        for n in msgs:
            print("prescanning", n)
            m = f.openmessage(n)
            text = self.getmessagetext(m, f.name)
            for p in pipeline:
                text = p.process(text)
            for word in text:
                uniqwords[word] = uniqwords.get(word, 0) + 1

    def bulkupdate(self, args):
        if not args:
            print("No folders specified; use ALL to bulk-index all folders")
            return
        if "ALL" in args:
            i = args.index("ALL")
            args[i:i+1] = self.mh.listfolders()
        for folder in args:
            if folder.startswith("+"):
                folder = folder[1:]
            print("\nFOLDER", folder)
            try:
                f = self.mh.openfolder(folder)
            except mhlib.Error as msg:
                print(msg)
                continue
            self.updatefolder(f, f.listmessages())
            print("Total", len(self.docpaths))
        self.commit()
        print("Indexed", self.index.lexicon._nbytes, "bytes and", end=' ')
        print(self.index.lexicon._nwords, "words;", end=' ')
        print(len(self.index.lexicon._words), "unique words.")

    def updatefolder(self, f, msgs):
        self.watchfolders[f.name] = self.getmtime(f.name)
        for n in msgs:
            path = "%s/%s" % (f.name, n)
            docid = self.path2docid.get(path, 0)
            if docid and self.getmtime(path) == self.doctimes.get(docid, 0):
                print("unchanged", docid, path)
                continue
            docid = self.newdocid(path)
            try:
                m = f.openmessage(n)
            except IOError:
                print("disappeared", docid, path)
                self.unindexpath(path)
                continue
            text = self.getmessagetext(m, f.name)
            if not text:
                self.unindexpath(path)
                continue
            print("indexing", docid, path)
            self.index.index_doc(docid, text)
            self.maycommit()
        # Remove messages from the folder that no longer exist
        for path in list(self.path2docid.keys(f.name)):
            if not path.startswith(f.name + "/"):
                break
            if self.getmtime(path) == 0:
                self.unindexpath(path)
        print("done.")

    def unindexpath(self, path):
        if path in self.path2docid:
            docid = self.path2docid[path]
            print("unindexing", docid, path)
            del self.docpaths[docid]
            del self.doctimes[docid]
            del self.path2docid[path]
            try:
                self.index.unindex_doc(docid)
            except KeyError as msg:
                print("KeyError", msg)
            self.maycommit()

    def getmessagetext(self, m, name=None):
        L = []
        if name:
            L.append("_folder " + name) # To restrict search to a folder
            self.getheaders(m, L)
        try:
            self.getmsgparts(m, L, 0)
        except KeyboardInterrupt:
            raise
        except:
            print("(getmsgparts failed:)")
            reportexc()
        return L

    def getmsgparts(self, m, L, level):
        ctype = m.gettype()
        if level or ctype != "text/plain":
            print(". "*level + str(ctype))
        if ctype == "text/plain":
            L.append(m.getbodytext())
        elif ctype in ("multipart/alternative", "multipart/mixed"):
            for part in m.getbodyparts():
                self.getmsgparts(part, L, level+1)
        elif ctype == "message/rfc822":
            f = StringIO(m.getbodytext())
            m = mhlib.Message("<folder>", 0, f)
            self.getheaders(m, L)
            self.getmsgparts(m, L, level+1)

    def getheaders(self, m, L):
        H = []
        for key in "from", "to", "cc", "bcc", "subject":
            value = m.get(key)
            if value:
                H.append(value)
        if H:
            L.append("\n".join(H))

    def newdocid(self, path):
        docid = self.path2docid.get(path)
        if docid is not None:
            self.doctimes[docid] = self.getmtime(path)
            return docid
        docid = self.maxdocid + 1
        self.maxdocid = docid
        self.docpaths[docid] = path
        self.doctimes[docid] = self.getmtime(path)
        self.path2docid[path] = docid
        return docid

    def getmtime(self, path):
        path = os.path.join(self.mh.getpath(), path)
        try:
            st = os.stat(path)
        except os.error as msg:
            return 0
        return int(st[ST_MTIME])

    def maycommit(self):
        self.trans_count += 1
        if self.trans_count >= self.trans_limit > 0:
            self.commit()

    def commit(self):
        if self.trans_count > 0:
            print("committing...")
            transaction.commit()
            self.trans_count = 0
            self.pack_count += 1
            if self.pack_count >= self.pack_limit > 0:
                self.pack()

    def pack(self):
        if self.pack_count > 0:
            print("packing...")
            self.database.pack()
            self.pack_count = 0