def test_convert_pdf_page_to_web(tmp_fixture_dir): """Convert single page pdf to web version""" pdf = Path(tmp_fixture_dir) / 'PDF' / PAGE_ONE # Make new file opt = convert_pdf_to_web(pdf) # Default file location assert opt == pdf.parent / 'WEB' / PAGE_ONE assert opt.exists() # Conversion changes file content assert opt.stat().st_size != pdf.stat().st_size # Calling again should not create new file mtime = opt.stat().st_mtime opt2 = convert_pdf_to_web(pdf) assert opt2.stat().st_mtime == mtime # If pdf if changed, convert should rerun pdf.touch() opt3 = convert_pdf_to_web(pdf) assert opt3.stat().st_mtime > mtime # Nonexisting file should raise error false_pdf = pdf.with_name('nonexisting') assert not false_pdf.is_file() with pytest.raises(FileNotFoundError): convert_pdf_to_web(false_pdf)
def generate(db, base_dir: str, conf_common: PVConf, conf_branches: BranchesConf, force: bool): dist_dir = base_dir + '/dists.new' pool_dir = base_dir + '/pool' dist_dir_real = base_dir + '/dists' dist_dir_old = base_dir + '/dists.old' shutil.rmtree(dist_dir, ignore_errors=True) for key in conf_branches.keys(): i = PosixPath(pool_dir).joinpath(key) if not i.is_dir(): continue branch_name = i.name realbranchdir = os.path.join(dist_dir_real, branch_name) inrel = PosixPath(realbranchdir).joinpath('InRelease') expire_renewal_period = timedelta(days=conf_branches[branch_name].get( "renew_in", 1)).total_seconds() if not force and inrel.is_file(): # See if we can skip this branch altogether inrel_mtime = inrel.stat().st_mtime inrel_sec_to_expire = get_valid_until_from_release( inrel) - datetime.now().timestamp() cur = db.cursor() cur.execute( "SELECT coalesce(extract(epoch FROM max(mtime)), 0) " "FROM pv_repos WHERE branch=%s", (branch_name, )) db_mtime = cur.fetchone()[0] cur.close() # Skip if # - P-vector does not recognize this branch (usually means branch is empty) # OR On-disk release mtime is newer than last time db was updated # AND On-disk release won't expire in 1 day if not db_mtime or inrel_mtime > db_mtime and inrel_sec_to_expire > expire_renewal_period: shutil.copytree(realbranchdir, os.path.join(dist_dir, branch_name)) logger_rel.info('Skip generating Packages and Contents for %s', branch_name) continue component_name_list = [] for j in PosixPath(pool_dir).joinpath(branch_name).iterdir(): if not j.is_dir(): continue component_name = j.name component_name_list.append(component_name) logger_rel.info('Generating Packages for %s-%s', branch_name, component_name) gen_packages(db, dist_dir, branch_name, component_name) logger_rel.info('Generating Contents for %s-%s', branch_name, component_name) gen_contents(db, branch_name, component_name, dist_dir) conf = conf_common.copy() conf.update(conf_branches[branch_name]) logger_rel.info('Generating Release for %s', branch_name) gen_release(db, branch_name, component_name_list, dist_dir, conf) if PosixPath(dist_dir_real).exists(): os.rename(dist_dir_real, dist_dir_old) os.rename(dist_dir, dist_dir_real) shutil.rmtree(dist_dir_old, True)
def get_item_dict(self, item: PosixPath, relative: Optional[PosixPath] = None) -> Dict[str, Any]: is_file: bool = item.is_file() _dict: Dict[str, Any] = { "name": item.name, "full_path": str(item), "type": "file" if is_file else "folder", "size": item.stat().st_size, "date": datetime.fromtimestamp(item.stat().st_ctime).date(), } if is_file: _dict["suffix"] = item.suffix _dict["used"] = str(item) in self.db_files else: _dict["files"] = [] if relative: _dict["relative_path"] = str( item.relative_to(Path(relative.parent))) self.found_files.add(str(item)) return _dict
def read_stream(self, stream_name, fn: PosixPath): stream = self._streams[stream_name] with stream.open() as fd: with open(fn, 'wb') as file: total = 100 pbar = tqdm(total=total) data_loaded = 0 while True: data = fd.read(1024) # raise IOError("Read timeout") if data: file.write(data) data_loaded += 1 if data_loaded % 1024 == 0: pbar.update(1) pbar.set_description( f"loaded: {data_loaded // 1024}Mb") if data_loaded // 1024 == total - 1: total += 100 pbar.total = total else: break file_size = fn.stat().st_size print(f"loaded file: {fn}, size: {file_size // 1024 // 1024}Mb")
def _get_date(self, file_path: PosixPath, date: str = "") -> str: """获取日期,如果日期为空,获取文件的最后修改日期""" if date: return date timestamp = file_path.stat().st_mtime return time.strftime('%Y-%m-%d', time.localtime(timestamp))
def scan_dir(db, base_dir: str, branch: str, component: str, branch_idx: int): pool_path = PosixPath(base_dir).joinpath('pool') search_path = pool_path.joinpath(branch).joinpath(component) compname = '%s-%s' % (branch, component) comppath = '%s/%s' % (branch, component) cur = db.cursor() cur.execute("""SELECT p.package, p.version, p.repo, p.architecture, p.filename, p.size, p.mtime, p.sha256 FROM pv_packages p INNER JOIN pv_repos r ON p.repo=r.name WHERE r.path=%s UNION ALL SELECT p.package, p.version, p.repo, p.architecture, p.filename, p.size, p.mtime, p.sha256 FROM pv_package_duplicate p INNER JOIN pv_repos r ON p.repo=r.name WHERE r.path=%s""", (comppath, comppath)) dup_pkgs = set() ignore_files = set() modified_repo = set() del_list = [] # For each package/version/architecture we already know in the DB: for package, version, repopath, architecture, filename, size, mtime, sha256 in cur: fullpath = PosixPath(base_dir).joinpath(filename) if fullpath.is_file(): # If a package with the same name exists: stat = fullpath.stat() sfullpath = str(fullpath) if size == stat.st_size and (mtime == int(stat.st_mtime) or # Ignore if the file isn't changed internal_pkgscan.sha256_file(sfullpath) == sha256): ignore_files.add(sfullpath) else: # Consider the new file to be a duplicate and replace the old one dup_pkgs.add(filename) del_list.append((filename, package, version, repopath)) else: # If the package has been deleted del_list.append((filename, package, version, repopath)) logger_scan.info('CLEAN %s', filename) module_ipc.publish_change( compname, package, architecture, 'delete', version, '') # For each package/version/arch/repo to be deleted: for row in del_list: cur.execute("DELETE FROM pv_packages WHERE filename=%s", (row[0],)) modified_repo.add(row[1:][-1]) # Check if there are any new files added. Recursively scan the pool dir and take notes of # what we haven't seen yet. check_list = [] for fullpath in search_path.rglob('*.deb'): if not fullpath.is_file(): continue stat = fullpath.stat() sfullpath = str(fullpath) if sfullpath in ignore_files: continue check_list.append((sfullpath, str(fullpath.relative_to(base_dir)), stat.st_size, int(stat.st_mtime))) del ignore_files with multiprocessing.dummy.Pool(max(1, os.cpu_count() - 1)) as mpool: for pkginfo, depinfo, sodeps, files in mpool.imap_unordered(scan_deb, check_list, 5): realname = pkginfo['architecture'] validdeb = ('debtime' in pkginfo) if realname == 'all': realname = 'noarch' if component != 'main': realname = component + '-' + realname repo = '%s/%s' % (realname, branch) cur.execute("INSERT INTO pv_repos VALUES (%s,%s,%s,%s,%s,%s,%s,now()) " "ON CONFLICT DO NOTHING", (repo, realname, comppath, branch_idx, branch, component, pkginfo['architecture'])) modified_repo.add(repo) pkginfo['repo'] = repo dbkey = (pkginfo['package'], pkginfo['version'], repo) if pkginfo['filename'] in dup_pkgs: if validdeb: logger_scan.info('UPDATE %s', pkginfo['filename']) module_ipc.publish_change( compname, pkginfo['package'], pkginfo['architecture'], 'overwrite', pkginfo['version'], pkginfo['version'] ) else: cur.execute("SELECT version, filename FROM pv_packages " "WHERE package=%s AND repo=%s", (pkginfo['package'], repo)) results = cur.fetchall() if results: oldver = max(results, key=lambda x: dpkg_vercomp_key(x[0])) vercomp = internal_dpkg_version.dpkg_version_compare( oldver[0], pkginfo['version']) if vercomp == -1: if validdeb: logger_scan.info('NEWER %s %s %s >> %s', pkginfo['architecture'], pkginfo['package'], pkginfo['version'], oldver[0]) module_ipc.publish_change( compname, pkginfo['package'], pkginfo['architecture'], 'upgrade', oldver[0], pkginfo['version'] ) elif vercomp: logger_scan.warning('OLD %s %s %s', pkginfo['architecture'], pkginfo['package'], pkginfo['version']) else: cur.execute("DELETE FROM pv_package_sodep " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("DELETE FROM pv_package_files " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("DELETE FROM pv_package_dependencies " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("DELETE FROM pv_package_duplicate " "WHERE package=%s AND version=%s AND repo=%s", dbkey) cur.execute("INSERT INTO pv_package_duplicate " "SELECT * FROM pv_packages WHERE filename=%s", (oldver[1],)) cur.execute("DELETE FROM pv_packages " "WHERE package=%s AND version=%s AND repo=%s", dbkey) logger_scan.error('DUP %s == %s', oldver[1], pkginfo['filename']) elif validdeb: logger_scan.info('NEW %s %s %s', pkginfo['architecture'], pkginfo['package'], pkginfo['version']) module_ipc.publish_change( compname, pkginfo['package'], pkginfo['architecture'], 'new', '', pkginfo['version'] ) keys, qms, vals = internal_db.make_insert(pkginfo) cur.execute("INSERT INTO pv_packages (%s) VALUES (%s)" % (keys, qms), vals) for row in depinfo.items(): cur.execute("INSERT INTO pv_package_dependencies " "VALUES (%s,%s,%s,%s,%s) " "ON CONFLICT ON CONSTRAINT pv_package_dependencies_pkey " "DO UPDATE SET value = %s", dbkey + row + (row[1],)) for row in sodeps: cur.execute("INSERT INTO pv_package_sodep VALUES " "(%s,%s,%s,%s,%s,%s)", dbkey + row) for row in files: cur.execute("INSERT INTO pv_package_files VALUES " "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", dbkey + row) for repo in modified_repo: cur.execute("UPDATE pv_repos SET mtime=now() WHERE name=%s", (repo,))