def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) sumsfile = sums_path(pkgdir) sumsfile_tmp = sumsfile + '.new' def emit_checksum(out, relpath, abspath): if os.path.islink(abspath) or not os.path.isfile(abspath): # Do not checksum symlinks, if they are not dangling / external we # will checksum their target anyhow. Do not check special files # either; they shouldn't be there per policy, but they might be # (and they are in old releases) return sha256 = hashutil.sha256sum(abspath) out.write('%s %s\n' % (sha256, relpath)) if 'hooks.fs' in conf['backends']: if not os.path.exists(sumsfile): # compute checksums only if needed with open(sumsfile_tmp, 'w') as out: for (relpath, abspath) in \ fs_storage.walk_pkg_files(pkgdir, file_table): emit_checksum(out, relpath, abspath) os.rename(sumsfile_tmp, sumsfile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) insert_q = sql.insert(Checksum.__table__) insert_params = [] if not session.query(Checksum) \ .filter_by(package_id=db_package.id) \ .first(): # ASSUMPTION: if *a* checksum of this package has already # been added to the db in the past, then *all* of them have, # as additions are part of the same transaction for (sha256, relpath) in parse_checksums(sumsfile): params = {'package_id': db_package.id, 'sha256': sha256} if file_table: try: file_id = file_table[relpath] params['file_id'] = file_id except KeyError: continue else: file_ = session.query(File) \ .filter_by(package_id=db_package.id, path=relpath) \ .first() if not file_: continue params['file_id'] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # source packages shouldn't be empty but... session.execute(insert_q, insert_params) session.flush()
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) license_file = license_path(pkgdir) license_file_tmp = license_file + '.new' def emit_license(out, session, package, version, relpath, pkgdir): """ Retrieve license of the file. We use `relpath` as we want the path inside the package directory which is used in the d/copyright files paragraphs """ # join path for debian/copyright file as we are already in the sources # directory. synopsis = helper.get_license(session, package, version, relpath, os.path.join(pkgdir, 'debian/copyright')) if synopsis is not None: s = '%s\t%s\n' % (synopsis, relpath.decode('utf-8')) out.write(s) if 'hooks.fs' in conf['backends']: if not os.path.exists(license_file): # run license only if needed with io.open(license_file_tmp, 'w', encoding='utf-8') as out: for (relpath, abspath) in \ fs_storage.walk_pkg_files(pkgdir, file_table): emit_license(out, session, pkg['package'], pkg['version'], relpath, pkgdir) os.rename(license_file_tmp, license_file) if 'hooks.db' in conf['backends']: licenses = parse_license_file(license_file) db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) session.query(FileCopyright) \ .join(File) \ .filter(File.package_id == db_package.id) if not session.query(FileCopyright).join(File)\ .filter(File.package_id == db_package.id).first(): # ASSUMPTION: if *a* license of this package has already been # added to the db in the past, then *all* of them have, as # additions are part of the same transaction for (synopsis, path) in licenses: if file_table: try: file_id = file_table[path] except KeyError: continue else: file_ = session.query(File) \ .filter_by(package_id=db_package.id, path=path) \ .first() if not file_: continue file_id = file_.id license = FileCopyright(file_id, 'debian', synopsis) session.add(license)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) sumsfile = sums_path(pkgdir) sumsfile_tmp = sumsfile + ".new" def emit_checksum(out, relpath, abspath): if os.path.islink(abspath) or not os.path.isfile(abspath): # Do not checksum symlinks, if they are not dangling / external we # will checksum their target anyhow. Do not check special files # either; they shouldn't be there per policy, but they might be # (and they are in old releases) return sha256 = hashutil.sha256sum(abspath) out.write("%s %s\n" % (sha256, relpath)) if "hooks.fs" in conf["backends"]: if not os.path.exists(sumsfile): # compute checksums only if needed with open(sumsfile_tmp, "w") as out: for (relpath, abspath) in fs_storage.walk_pkg_files(pkgdir, file_table): emit_checksum(out, relpath, abspath) os.rename(sumsfile_tmp, sumsfile) if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) insert_q = sql.insert(Checksum.__table__) insert_params = [] if not session.query(Checksum).filter_by(package_id=db_package.id).first(): # ASSUMPTION: if *a* checksum of this package has already # been added to the db in the past, then *all* of them have, # as additions are part of the same transaction for (sha256, relpath) in parse_checksums(sumsfile): params = {"package_id": db_package.id, "sha256": sha256} if file_table: try: file_id = file_table[relpath] params["file_id"] = file_id except KeyError: continue else: file_ = session.query(File).filter_by(package_id=db_package.id, path=relpath).first() if not file_: continue params["file_id"] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # source packages shouldn't be empty but... session.execute(insert_q, insert_params) session.flush()
def add_package(session, pkg, pkgdir, sticky=False): """Add `pkg` (a `debmirror.SourcePackage`) to the DB. If `sticky` is set, also set the corresponding bit in the versions table. Return the package file table, which maps relative (file) path within the extracted package to file identifiers pointing into the `models.File` table. Suitable usages of the file table include: - DB cache to avoid re-fetching all file IDs - FS cache to avoid re-scanning package dir to iterate over file names """ logging.debug('add to db %s...' % pkg) package_name = session.query(PackageName) \ .filter_by(name=pkg['package']) \ .first() if not package_name: package_name = PackageName(pkg['package']) session.add(package_name) db_package = session.query(Package) \ .filter_by(version=pkg['version'], name_id=package_name.id) \ .first() if not db_package: db_package = Package(pkg['version'], package_name, sticky) db_package.area = pkg.archive_area() if 'vcs-browser' in pkg: db_package.vcs_browser = pkg['vcs-browser'] for vcs_type in VCS_TYPES: vcs_key = 'vcs-' + vcs_type if vcs_key in pkg: db_package.vcs_type = vcs_type db_package.vcs_url = pkg[vcs_key] package_name.versions.append(db_package) session.add(db_package) session.flush() # to get a version.id, needed by File below # add individual source files to the File table file_table = {} for (relpath, _abspath) in fs_storage.walk_pkg_files(pkgdir): file_ = File(db_package, relpath) session.add(file_) session.flush() file_table[relpath] = file_.id return file_table