def compare_depfinder_audits(gx): # This really needs to be all the python packages, since this doesn't cover outputs python_nodes = {n for n, v in gx.nodes("payload") if "python" in v.get("req", "")} python_nodes.update( [ k for node_name, node in gx.nodes("payload") for k in node.get("outputs_names", []) if node_name in python_nodes ], ) bad_inspection = {} files = os.listdir("audits/depfinder") if "_net_audit.json" in files: files.pop(files.index("_net_audit.json")) for node, attrs in gx.nodes("payload"): if ( attrs.get("version", None) is None or attrs.get("archived", False) or attrs.get("bad", False) ): continue if "requirements" not in attrs: print("node %s doesn't have requirements!" % node, flush=True) continue node_version = f"{node}_{attrs['version']}" # construct the expected filename expected_filename = f"{node_version}.json" if expected_filename in files: with open(os.path.join("audits/depfinder", expected_filename)) as f: output = load(f) if isinstance(output, str) or "traceback" in output: bad_inspection[node_version] = output continue d = extract_missing_packages( required_packages=output.get("required", {}), questionable_packages=output.get("questionable", {}), run_packages=attrs["requirements"]["run"], node=node, python_nodes=python_nodes, ) bad_inspection[node_version] = d or False with open("audits/depfinder/_net_audit.json", "w") as f: dump(bad_inspection, f) return bad_inspection
def compare_depfinder_audits(gx): # This really needs to be all the python packages, since this doesn't cover outputs python_nodes = { n for n, v in gx.nodes("payload") if "python" in v.get("req", "") } python_nodes.update([ k for node_name, node in gx.nodes("payload") for k in node.get("outputs_names", []) if node_name in python_nodes ], ) imports_by_package, packages_by_import = create_package_import_maps( python_nodes, # set(gx.nodes) ) bad_inspection = {} files = os.listdir("audits/depfinder") if "_net_audit.json" in files: files.pop(files.index("_net_audit.json")) for node, attrs in gx.nodes("payload"): if not attrs.get("version"): continue node_version = f"{node}_{attrs['version']}" # construct the expected filename expected_filename = f"{node_version}.json" if expected_filename in files: with open(os.path.join("audits/depfinder", expected_filename), "r") as f: output = load(f) if isinstance(output, str): bad_inspection[node_version] = output continue d = extract_missing_packages( required_imports=output.get("required", set()), questionable_imports=output.get("questionable", set()), run_packages=attrs["requirements"]["run"], package_by_import=packages_by_import, import_by_package=imports_by_package, node=node, nodes=python_nodes, # set(gx.nodes) ) bad_inspection[node_version] = d or False with open("audits/depfinder/_net_audit.json", "w") as f: dump(bad_inspection, f) return bad_inspection
def compare_grayskull_audits(gx): grayskull_files = os.listdir("audits/grayskull") bad_inspections = {} if "_net_audit.json" in grayskull_files: grayskull_files.pop(grayskull_files.index("_net_audit.json")) with open("audits/grayskull/_net_audit.json") as f: bad_inspections = load(f) futures = {} with executor("dask", max_workers=20) as pool: for node, attrs in gx.nodes("payload"): if not attrs.get("version"): continue node_version = f"{node}_{attrs['version']}" if node_version in bad_inspections: continue # construct the expected filename expected_filename = f"{node_version}.yml" if expected_filename in grayskull_files: with open( os.path.join("audits/grayskull", expected_filename), ) as f: meta_yaml = f.read() futures[ pool.submit( inner_grayskull_comparison, meta_yaml=meta_yaml, attrs=attrs, node=node, ) ] = node_version for future in as_completed(futures): try: bad_inspections[futures[future]] = future.result() except Exception as e: bad_inspections[futures[future]] = str(e) with open("audits/grayskull/_net_audit.json", "w") as f: dump(bad_inspections, f) return bad_inspections
def compare_depfinder_audits(gx): bad_inspection = {} files = os.listdir("audits/depfinder") if "_net_audit.json" in files: files.pop(files.index("_net_audit.json")) for node, attrs in gx.nodes("payload"): if not attrs.get("version"): continue node_version = f"{node}_{attrs['version']}" # construct the expected filename expected_filename = f"{node_version}.json" if expected_filename in files: with open(os.path.join("audits/depfinder", expected_filename), "r") as f: output = load(f) if isinstance(output, str): bad_inspection[node_version] = output continue quest = output.get("questionable", set()) required_pkgs = output.get("required", set()) d = {} run_req = attrs["requirements"]["run"] excludes = { node, node.replace("-", "_"), node.replace("_", "-"), "python", "setuptools", } cf_minus_df = run_req - required_pkgs - excludes - quest if cf_minus_df: d.update(cf_minus_df=cf_minus_df) df_minus_cf = required_pkgs - run_req - excludes if df_minus_cf: d.update(df_minus_cf=df_minus_cf) bad_inspection[node_version] = d or False with open("audits/depfinder/_net_audit.json", "w") as f: dump(bad_inspection, f) return bad_inspection
def main(args): gx = load_graph() ctx = MigratorSessionContext("", "", "") start_time = time.time() # limit graph to things that depend on python python_des = nx.descendants(gx, "pypy-meta") for node in sorted( python_des, key=lambda x: (len(nx.descendants(gx, x)), x), reverse=True, ): if time.time() - int(env.get("START_TIME", start_time)) > int( env.get("TIMEOUT", 60 * 30)): break # depfinder only work on python at the moment so only work on things # with python as runtime dep os.makedirs("audits", exist_ok=True) with gx.nodes[node]["payload"] as payload: version = payload.get('version', None) if (not payload.get("archived", False) and version and "python" in payload["requirements"]["run"] and f'{node}_{version}.json' not in os.listdir("audits")): print(node) fctx = FeedstockContext(package_name=node, feedstock_name=payload["name"], attrs=payload) try: deps = audit_feedstock(fctx, ctx) except Exception as e: deps = { "exception": str(e), "traceback": str(traceback.format_exc()).split("\n"), } finally: with open(f"audits/{node}_{version}.json", "w") as f: dump(deps, f)
def main(args): gx = load_graph() ctx = MigratorSessionContext("", "", "") start_time = time.time() os.makedirs("audits", exist_ok=True) for k, v in AUDIT_REGISTRY.items(): audit_dir = os.path.join("audits", k) version_path = os.path.join(audit_dir, "_version.json") audit_version = "_".join([v["version"], v["creation_version"]]) if os.path.exists(version_path): version = load(open(version_path)) # if the version of the code generating the audits is different from our current audit data # clear out the audit data so we always use the latest version if version != audit_version: shutil.rmtree(audit_dir) os.makedirs(audit_dir, exist_ok=True) dump(audit_version, open(version_path, "w")) # TODO: generalize for cran skeleton # limit graph to things that depend on python python_des = nx.descendants(gx, "python") for node in sorted( python_des, key=lambda x: (len(nx.descendants(gx, x)), x), reverse=True, ): if time.time() - int(env.get("START_TIME", start_time)) > int( env.get("TIMEOUT", 60 * RUNTIME_MINUTES), ): break # depfinder only work on python at the moment so only work on things # with python as runtime dep payload = gx.nodes[node]["payload"] for k, v in AUDIT_REGISTRY.items(): version = payload.get("version", None) ext = v["ext"] if ( not payload.get("archived", False) and not payload.get("bad", False) and version and "python" in payload["requirements"]["run"] and f"{node}_{version}.{ext}" not in os.listdir(f"audits/{k}") ): fctx = FeedstockContext( package_name=node, feedstock_name=payload["feedstock_name"], attrs=payload, ) try: deps = v["run"](fctx, ctx) except Exception as e: deps = { "exception": str(e), "traceback": str(traceback.format_exc()).split("\n"), } if "dumper" in v: deps = v["dumper"](deps) finally: if deps: with open(f"audits/{k}/{node}_{version}.{ext}", "w") as f: v["writer"](deps, f) # grayskull_audit_outcome = compare_grayskull_audits(gx) # compute_grayskull_accuracy(grayskull_audit_outcome) depfinder_audit_outcome = compare_depfinder_audits(gx) compute_depfinder_accuracy(depfinder_audit_outcome)
import glob import tqdm from conda_forge_tick.utils import load, dump from conda_forge_tick.git_utils import trim_pr_josn_keys fnames = glob.glob("pr_json/*.json") print("found %d json files" % len(fnames), flush=True) for fname in tqdm.tqdm(fnames): with open(fname) as fp: pr_json = load(fp) pr_json = trim_pr_josn_keys(pr_json) with open(fname, "w") as fp: dump(pr_json, fp)