def getFeatureProject(graphJob: signac.Project.Job): try: featureProject = signac.get_project(root=graphJob.workspace(), search=False) except LookupError: featureProject = signac.init_project("SyntheticExperimentFeatures", root=graphJob.workspace(), workspace="features") return featureProject
def getSplitProject(featureJob: signac.Project.Job): try: splitProject = signac.get_project(root=featureJob.workspace(), search=False) except LookupError: splitProject = signac.init_project("SyntheticExperimentSplits", root=featureJob.workspace(), workspace="splits") return splitProject
def clear_job(job: signac.Project.Job): workspaceDirObj = Path(job.workspace()) for child in workspaceDirObj.iterdir(): if child.name not in ["signac_statepoint.json", "signac_job_document.json"]: if child.is_dir(): print(f"Deleting directory {child}") shutil.rmtree(str(child)) else: print(f"Deleting {child}") child.unlink()
def generate_graph(job: signac.Project.Job): print("Generating graph for job {}".format(job.get_id())) graphgen.random_state = reset_random_state(job) if job.sp.method == "mixhop": generator = graphgen.MixhopGraphGenerator( job.sp.classRatio, job.sp.heteroClsWeight, heteroWeightsExponent=job.sp.heteroWeightsExponent) G = generator(job.sp.numNode, job.sp.m, job.sp.m0, job.sp.h) generator.save_graph(G, job.workspace(), job.sp.graphName) generator.save_y(G, job.workspace(), job.sp.graphName) generator.save_nx_graph(G, job.workspace(), job.sp.graphName) elif job.sp.method == "planetoid": with job: dataset = utils.PlanetoidData(job.sp.datasetName, "data_source") G = dataset.getNXGraph() generator = graphgen.GraphGenerator(job.sp.numClass) generator.save_graph(G, job.workspace(), job.sp.graphName) generator.save_y(G, job.workspace(), job.sp.graphName) generator.save_nx_graph(G, job.workspace(), job.sp.graphName) featureProject = utils.signac_tools.getFeatureProject(job) featureJob = featureProject.open_job({ "feature_type": "unmodified" }).init() splitProject = utils.signac_tools.getSplitProject(featureJob) trainSetSize = dataset.y_all[dataset.train_mask].sum(0) if len(np.unique(trainSetSize)) == 1: trainSetSize = "{}c".format(int(trainSetSize[0])) else: trainSetSize = int(dataset.train_mask.sum()) splitJob = splitProject.open_job({ "split_config": "{}__{}".format(trainSetSize, int(dataset.test_mask.sum())) }).init() elif job.sp.method == "GeomGCN": with job: dataset = utils.GeomGCNData(job.sp.datasetName, "data_source") G = dataset.getNXGraph() generator = graphgen.GraphGenerator(job.sp.numClass) generator.save_graph(G, job.workspace(), job.sp.graphName) generator.save_y(G, job.workspace(), job.sp.graphName) generator.save_nx_graph(G, job.workspace(), job.sp.graphName) featureProject = utils.signac_tools.getFeatureProject(job) featureJob = featureProject.open_job({ "feature_type": "unmodified" }).init() output_name = f"{job.sp.graphName}-unmodified.allx.npz" allx = dataset.features allx = scipy.sparse.csr_matrix(allx) scipy.sparse.save_npz(featureJob.fn(output_name), allx) featureJob.doc["feature_file"] = output_name featureJob.doc["feature_name"] = f"{job.sp.datasetName}-unmodified" featureJob.doc["succeeded"] = True elif job.sp.method == "SparseGraph": with job: spgraph = sparsegraph.io.load_dataset( str(Path("data_source")/job.sp.datasetName)) for command in job.sp.get("preprocess", []): exec(command) G = spgraph.getNXGraph() generator = graphgen.GraphGenerator(job.sp.numClass) generator.save_graph(G, job.workspace(), job.sp.graphName) generator.save_y(G, job.workspace(), job.sp.graphName) generator.save_nx_graph(G, job.workspace(), job.sp.graphName) featureProject = utils.signac_tools.getFeatureProject(job) featureJob = featureProject.open_job({ "feature_type": "unmodified" }).init() if spgraph.attr_matrix is not None: # Generate features output_name = f"{job.sp.graphName}-unmodified.allx.npz" allx = spgraph.attr_matrix allx = scipy.sparse.csr_matrix(allx) scipy.sparse.save_npz(featureJob.fn(output_name), allx) featureJob.doc["feature_file"] = output_name featureJob.doc["feature_name"] = f"{job.sp.datasetName}-unmodified" featureJob.doc["succeeded"] = True elif job.sp.method == "copy": graph_path, ally_path, ty_path, test_index_path = map( lambda x: job.fn("source_graph/{}{}".format(job.sp.source_name, x)), (".graph", ".ally", ".ty", ".test.index")) graph = pickle.load(open(graph_path, "rb")) G = nx.from_dict_of_lists(graph) ally = np.load(ally_path, allow_pickle=True) ty = np.load(ty_path, allow_pickle=True) attrs = dict() for i in range(ally.shape[0]): color = np.nonzero(ally[i, :])[0] + 1 assert len(color) == 1, print(i, color) color = color[0] attrs[i] = {"color": color} for i, line in enumerate(open(test_index_path, "r")): node_id = int(line.strip()) color = np.nonzero(ty[i, :])[0] + 1 assert len(color) == 1, print(i, color) color = color[0] attrs[node_id] = {"color": color} assert i == ty.shape[0] - 1 assert len(attrs) == len(G.node) nx.set_node_attributes(G, attrs) generator = graphgen.GraphGenerator(job.sp.numClass) generator.save_graph(G, job.workspace(), job.sp.graphName) generator.save_y(G, job.workspace(), job.sp.graphName) generator.save_nx_graph(G, job.workspace(), job.sp.graphName) else: raise ValueError("Unknown generation method {}".format(job.sp.method))
def getModelProject(splitJob: signac.Project.Job, modelRoot: str): projectRoot = Path(splitJob.workspace()) / modelRoot modelProject = signac.get_project(root=str(projectRoot), search=False) return modelProject