def scanner(root): speed = 0 timer = datetime.now() try: pid = os.getpid() try: with DBSession() as db: sql = """DELETE FROM filetag WHERE file_id NOT IN (SELECT ID FROM fileinfo)""" with SQLResult(db.orm, sql) as res: if res.rowcount > 0: logger.info("Delete {} tags.".format(res.rowcount)) r = db.orm.query(SysInfo).filter(SysInfo.name == "pid").first() if r: r.value = str(pid) else: db.orm.add(SysInfo(name="pid", value=str(pid))) r = db.orm.query(SysInfo).filter( SysInfo.name == 'last_scan').first() force = False if r: force = (datetime.now() - datetime.strptime( r.value, "%Y-%m-%d %H:%M:%S")).seconds > int( config['scan_interval']) logger.warning("force: {}".format(force)) set_progress(1, "", speed) scan_dir(pid, root, force) with DBSession(auto_commit=True) as db: count = db.orm.query( FileInfo.id).filter(FileInfo.pid != None).count() speed = int(count / get_elapsed(timer)) if force: clean_notexists() finally: logger.info("Updating directories info...") timer = datetime.now() set_progress(90, "", speed) with DBSession() as db: total_size, _ = update_dirinfo(db.orm, "") logger.info("Total size: {}".format(total_size)) logger.info("Elapsed time: {}".format( (datetime.now() - timer).seconds)) clean_scanner(pid) finally: dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") with DBSession() as db: r = db.orm.query(SysInfo).filter( SysInfo.name == "last_scan").first() if r: r.value = dt else: db.orm.add(SysInfo(name="last_scan", value=dt)) set_progress(100, "", speed)
def clean_scanner(pid): with DBSession() as db: rs = db.orm.query(FileInfo).filter(FileInfo.pid == pid).all() for r in rs: r.pid = None r = db.orm.query(SysInfo).filter(SysInfo.name == "pid").first() if r: r.value = None
def get_progress(): with DBSession(auto_commit=True) as db: orm = db.orm qry = orm.query(SysInfo).filter( SysInfo.name.in_(["progress", "cur_path", "speed"])) prog = {item.name: item.value for item in qry.all()} #info = get_status(db.orm) #prog['info'] = info return prog
def set_sysinfo(key, value): with DBSession() as db: res = db.orm.query(SysInfo).filter(SysInfo.name == key).first() if res: res.value = value else: res = SysInfo(name=key, value=value) db.orm.add(res) db.orm.flush()
def clean_notexists(): with DBSession() as db: qry = db.orm.query(FileInfo).filter(FileInfo.pid == None) for r in qry.all(): fullname = os.path.join(r.dirname, r.name) size = get_filesize(fullname) if size >= 0: logger.warning("missing %s" % fullname) r.size = size r.checksum = None else: logger.info("not exists: {}".format(fullname)) delete_tags(db.orm, r.id) db.orm.delete(r)
def gen_checksum(): root = os.path.expanduser(config['work_dir']) quickhash = expand_size(config['quick_hash_size']) with DBSession() as db: sq = db.orm.query(FileInfo.size).filter( FileInfo.ftype == 'F', FileInfo.size > 0).group_by(FileInfo.size).having( func.count(FileInfo.size) > 1).subquery() qry = db.orm.query(FileInfo).join( sq, FileInfo.size == sq.c.size).filter(FileInfo.pid != None).filter( or_( FileInfo.checksum == None, and_(FileInfo.quickhash != 0, FileInfo.quickhash != quickhash))) rs = [{ "id": r.id, "realname": os.path.realpath(os.path.join(root, r.dirname, r.name)), "checksum": r.checksum } for r in qry.all()] cs = None for rec in rs: if not cs and rec['checksum']: cs = rec['checksum'] checksum = get_filemd5(rec['realname']) with DBSession() as db: r = get_file(db.orm, rec['id']) if checksum == '-': db.orm.delete(r) else: r.checksum = checksum r.quickhash = quickhash if r.size > quickhash else 0 return len(rs)
def reset_scanner(): with DBSession() as db: orm = db.orm r = orm.query(SysInfo).filter(SysInfo.name == "pid").first() if r and check_pid(r.value): return "Scanner {} working, please wait...".format(r.value) pids = orm.query(FileInfo.pid).filter(FileInfo.pid != None).distinct() for p in pids: if check_pid(p.pid): return "Scanner {} working, please wait...".format(p.pid) else: sql = """UPDATE fileinfo SET pid=null WHERE pid = :pid""" with SQLResult(orm, sql, pid=p.pid) as res: if res.rowcount <= 0: logger.error("Reset scanner {} fail!".format(p.pid)) return None
def set_progress(progress, cur_path, speed): info = { "progress": str(int(progress)) if progress is not None else None, "cur_path": cur_path, "speed": str(int(speed)) if speed is not None else None, } with DBSession() as db: rs = db.orm.query(SysInfo).filter(SysInfo.name.in_(info.keys())).all() for item in rs: value = info.pop(item.name) if value is not None: item.value = value if info: for k, v in info.items(): if v is not None: db.orm.add(SysInfo(name=k, value=v))
def scan_dir(pid, root, force=False, linkpath=None, batch=None): if not batch: batch = ScanBatch(pid, force) try: for rdir, dirs, files in os.walk(root): batch.init_dirs(root, rdir, dirs, linkpath) if os.path.islink(rdir): logger.warning("link to: %s" % rdir) return for name in files: fileinfo = make_fileinfo(os.path.join(rdir, name), root, linkpath) if fileinfo: fileinfo['ftype'] = 'F' batch.add_file(fileinfo) for name in dirs: if name == '': continue fullname = os.path.join(rdir, name) fileinfo = make_fileinfo(fullname, root, linkpath) if fileinfo is None: continue fileinfo['ftype'] = 'D' batch.add_file(fileinfo) if not os.path.islink(fullname): continue with DBSession(auto_commit=True) as db: notexists = db.orm.query(FileInfo.id).filter( FileInfo.dirname == fileinfo['dirname'], FileInfo.name == fileinfo['name']).first() is None if root.startswith(fileinfo['realname']): continue # skip link to ancestor if force or notexists: scan_dir(pid, fileinfo['realname'], force, os.path.join(fileinfo['dirname'], fileinfo['name']), batch=batch) finally: batch.save_batch()
def save_batch(self): count = len(self.batch) with DBSession() as db: for fileinfo in self.batch: try: _ = str(fileinfo['name']).encode('utf-8') # test encoding filerec = db.orm.query(FileInfo).filter( FileInfo.dirname == fileinfo['dirname'], FileInfo.name == fileinfo['name']).first() if self.force or not filerec: add_file(db.orm, filerec, pid=self.pid, **fileinfo) except UnicodeEncodeError: logger.error( "Unicode error: {dirname}/{name}".format(**fileinfo)) except Exception as e: logger.error(format_exc()) logger.error("Error : {dirname}/{name} {error}".format( error=str(e), **fileinfo)) break self.batch = [] return gen_checksum() + count
def get_sysinfo(key): with DBSession(auto_commit=True) as db: res = db.orm.query(SysInfo.value).filter(SysInfo.name == key).first() return res.value if res else None