def resolve_output_dir_path( input_path: PosixPath, input_root_dir: PosixPath, output_root_dir: PosixPath, ) -> PosixPath: input_root_dir_path = pathlib.Path(input_root_dir) relative = input_path.relative_to(input_root_dir_path) return pathlib.Path(output_root_dir) / relative
def get_item_dict(self, item: PosixPath, relative: Optional[PosixPath] = None) -> Dict[str, Any]: is_file: bool = item.is_file() _dict: Dict[str, Any] = { "name": item.name, "full_path": str(item), "type": "file" if is_file else "folder", "size": item.stat().st_size, "date": datetime.fromtimestamp(item.stat().st_ctime).date(), } if is_file: _dict["suffix"] = item.suffix _dict["used"] = str(item) in self.db_files else: _dict["files"] = [] if relative: _dict["relative_path"] = str( item.relative_to(Path(relative.parent))) self.found_files.add(str(item)) return _dict
def scan_dir(db, base_dir: str, branch: str, component: str, branch_idx: int): pool_path = PosixPath(base_dir).joinpath('pool') search_path = pool_path.joinpath(branch).joinpath(component) compname = '%s-%s' % (branch, component) comppath = '%s/%s' % (branch, component) cur = db.cursor() cur.execute("""SELECT p.package, p.version, p.repo, p.architecture, p.filename, p.size, p.mtime, p.sha256 FROM pv_packages p INNER JOIN pv_repos r ON p.repo=r.name WHERE r.path=%s UNION ALL SELECT p.package, p.version, p.repo, p.architecture, p.filename, p.size, p.mtime, p.sha256 FROM pv_package_duplicate p INNER JOIN pv_repos r ON p.repo=r.name WHERE r.path=%s""", (comppath, comppath)) dup_pkgs = set() ignore_files = set() modified_repo = set() del_list = [] # For each package/version/architecture we already know in the DB: for package, version, repopath, architecture, filename, size, mtime, sha256 in cur: fullpath = PosixPath(base_dir).joinpath(filename) if fullpath.is_file(): # If a package with the same name exists: stat = fullpath.stat() sfullpath = str(fullpath) if size == stat.st_size and (mtime == int(stat.st_mtime) or # Ignore if the file isn't changed internal_pkgscan.sha256_file(sfullpath) == sha256): ignore_files.add(sfullpath) else: # Consider the new file to be a duplicate and replace the old one dup_pkgs.add(filename) del_list.append((filename, package, version, repopath)) else: # If the package has been deleted del_list.append((filename, package, version, repopath)) logger_scan.info('CLEAN %s', filename) module_ipc.publish_change( compname, package, architecture, 'delete', version, '') # For each package/version/arch/repo to be deleted: for row in del_list: cur.execute("DELETE FROM pv_packages WHERE filename=%s", (row[0],)) modified_repo.add(row[1:][-1]) # Check if there are any new files added. Recursively scan the pool dir and take notes of # what we haven't seen yet. check_list = [] for fullpath in search_path.rglob('*.deb'): if not fullpath.is_file(): continue stat = fullpath.stat() sfullpath = str(fullpath) if sfullpath in ignore_files: continue check_list.append((sfullpath, str(fullpath.relative_to(base_dir)), stat.st_size, int(stat.st_mtime))) del ignore_files with multiprocessing.dummy.Pool(max(1, os.cpu_count() - 1)) as mpool: for pkginfo, depinfo, sodeps, files in mpool.imap_unordered(scan_deb, check_list, 5): realname = pkginfo['architecture'] validdeb = ('debtime' in pkginfo) if realname == 'all': realname = 'noarch' if component != 'main': realname = component + '-' + realname repo = '%s/%s' % (realname, branch) cur.execute("INSERT INTO pv_repos VALUES (%s,%s,%s,%s,%s,%s,%s,now()) " "ON CONFLICT DO NOTHING", (repo, realname, comppath, branch_idx, branch, component, pkginfo['architecture'])) modified_repo.add(repo) pkginfo['repo'] = repo dbkey = (pkginfo['package'], pkginfo['version'], repo) if pkginfo['filename'] in dup_pkgs: if validdeb: logger_scan.info('UPDATE %s', pkginfo['filename']) module_ipc.publish_change( compname, pkginfo['package'], pkginfo['architecture'], 'overwrite', pkginfo['version'], pkginfo['version'] ) else: cur.execute("SELECT version, filename FROM pv_packages " "WHERE package=%s AND repo=%s", (pkginfo['package'], repo)) results = cur.fetchall() if results: oldver = max(results, key=lambda x: dpkg_vercomp_key(x[0])) vercomp = internal_dpkg_version.dpkg_version_compare( oldver[0], pkginfo['version']) if vercomp == -1: if validdeb: logger_scan.info('NEWER %s %s %s >> %s', pkginfo['architecture'], pkginfo['package'], pkginfo['version'], oldver[0]) module_ipc.publish_change( compname, pkginfo['package'], pkginfo['architecture'], 'upgrade', oldver[0], pkginfo['version'] ) elif vercomp: logger_scan.warning('OLD %s %s %s', pkginfo['architecture'], pkginfo['package'], pkginfo['version']) else: cur.execute("DELETE FROM pv_package_sodep " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("DELETE FROM pv_package_files " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("DELETE FROM pv_package_dependencies " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("DELETE FROM pv_package_duplicate " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("INSERT INTO pv_package_duplicate " "SELECT * FROM pv_packages WHERE filename=%s", (oldver[1],)) cur.execute("DELETE FROM pv_packages " "WHERE package=%s AND version=%s AND repo=%s", dbkey) logger_scan.error('DUP %s == %s', oldver[1], pkginfo['filename']) elif validdeb: logger_scan.info('NEW %s %s %s', pkginfo['architecture'], pkginfo['package'], pkginfo['version']) module_ipc.publish_change( compname, pkginfo['package'], pkginfo['architecture'], 'new', '', pkginfo['version'] ) keys, qms, vals = internal_db.make_insert(pkginfo) cur.execute("INSERT INTO pv_packages (%s) VALUES (%s)" % (keys, qms), vals) for row in depinfo.items(): cur.execute("INSERT INTO pv_package_dependencies " "VALUES (%s,%s,%s,%s,%s) " "ON CONFLICT ON CONSTRAINT pv_package_dependencies_pkey " "DO UPDATE SET value = %s", dbkey + row + (row[1],)) for row in sodeps: cur.execute("INSERT INTO pv_package_sodep VALUES " "(%s,%s,%s,%s,%s,%s)", dbkey + row) for row in files: cur.execute("INSERT INTO pv_package_files VALUES " "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", dbkey + row) for repo in modified_repo: cur.execute("UPDATE pv_repos SET mtime=now() WHERE name=%s", (repo,))
config_index = "./config_index.csv" # Read file list config_list = pd.read_csv(config_index) # For all config files for index, row in config_list.iterrows(): # Get row data path = PosixPath(row['path']) private = row['is_private'] == 1 # Destination resolving expendad_path = path.expanduser() if path.is_absolute(): dest = config_dest + "/" + str(path) elif path.parts[0] == "~": dest = config_dest + "/HOME/" + str(path.relative_to("~")) else: dest = config_dest + expendad_path dest_dir = os.path.dirname(dest) # Backup if expendad_path.exists(): # Make parent dir if necessary os.makedirs(dest_dir, exist_ok=True) # Make dummy placeholders for private files if private: dest = dest + "_DUMMY" if expendad_path.is_file(): print("DUMMY backup file : " + str(path) + ", to : " + dest) PosixPath(dest).touch() elif expendad_path.is_dir(): print("DUMMY backup dir : " + str(path) + ", to : " + dest)