def __init__(self, root, name): super(OGBNDataset, self).__init__(root) dataset = NodePropPredDataset(name, root) graph, y = dataset[0] x = torch.tensor(graph["node_feat"]) y = torch.tensor(y.squeeze()) row, col, edge_attr = coalesce(graph["edge_index"][0], graph["edge_index"][1], graph["edge_feat"]) edge_index = torch.stack([row, col], dim=0) edge_index, edge_attr = remove_self_loops(edge_index, edge_attr) row = torch.cat([edge_index[0], edge_index[1]]) col = torch.cat([edge_index[1], edge_index[0]]) edge_index = torch.stack([row, col], dim=0) if edge_attr is not None: edge_attr = torch.cat([edge_attr, edge_attr], dim=0) self.data = Graph(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y) self.data.num_nodes = graph["num_nodes"] assert self.data.num_nodes == self.data.x.shape[0] # split split_index = dataset.get_idx_split() self.data.train_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool) self.data.test_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool) self.data.val_mask = torch.zeros(self.data.num_nodes, dtype=torch.bool) self.data.train_mask[split_index["train"]] = True self.data.test_mask[split_index["test"]] = True self.data.val_mask[split_index["valid"]] = True self.transform = None
def __init__(self, root, name): self.name = name from ogb.nodeproppred import NodePropPredDataset dataset = NodePropPredDataset(name=name, root=root) split_idx = dataset.get_idx_split() data = dataset[0] num_nodes=data[1].shape[0] edge = data[0]["edge_index"] if name == "ogbn-arxiv": #convert ogbn-arxiv to undirected graph edge = np.concatenate([edge, edge[[1, 0]]], axis=1) self.graph = _C.Graph( edge_index=edge, num_nodes=num_nodes ) split_idx = dataset.get_idx_split() train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"] self.x = data[0]["node_feat"] self.y = data[1].squeeze() self.train_mask = np.zeros(num_nodes, np.int32) self.train_mask[train_idx] = 1 self.train_mask[test_idx] = 2 self.num_classes = dataset.num_classes
def ogbn_dataset_to_general_static_graph( cls, ogbn_dataset: NodePropPredDataset, nodes_label_key: str, nodes_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ..., edges_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ..., graph_data_key_mapping: _typing.Optional[_typing.Mapping[str, str]] = ... ) -> GeneralStaticGraph: split_idx = ogbn_dataset.get_idx_split() return cls.ogbn_data_to_general_static_graph( ogbn_dataset[0][0], ogbn_dataset[0][1], nodes_label_key, split_idx["train"], split_idx["valid"], split_idx["test"], nodes_data_key_mapping, edges_data_key_mapping, graph_data_key_mapping)
# log_out.write(args) print(args, file=log_out, flush=True) epochs = args.epoch node_dim = args.node_dim num_channels = args.num_channels lr = args.lr weight_decay = args.weight_decay num_layers = args.num_layers norm = args.norm adaptive_lr = args.adaptive_lr if args.ogb_mag: print("Using OGB MAG", flush=True) dataset = NodePropPredDataset(name="ogbn-mag") split_idx = dataset.get_idx_split() train_idx, valid_idx, test_idx = split_idx["train"], split_idx[ "valid"], split_idx["test"] graph, label = dataset[0] # graph: library-agnostic graph object AvsI = graph['edge_index_dict'][('author', 'affiliated_with', 'institution')] AvsP = graph['edge_index_dict'][('author', 'writes', 'paper')] PvsP = graph['edge_index_dict'][('paper', 'cites', 'paper')] PvsS = graph['edge_index_dict'][('paper', 'has_topic', 'field_of_study')] # empty_lists = [ [] for _ in range(len(AvsI[0])) ] # AvsIdict = dict(zip(AvsI[0],empty_lists)) empty_lists = [[] for _ in range(len(AvsI[1]))] IvsAdict = dict(zip(AvsI[1], empty_lists))
def get_graph_data(d_name="ogbn-proteins", mini_data=False): """ Param: d_name: name of dataset mini_data: if mini_data==True, only use a small dataset (for test) """ # import ogb data dataset = NodePropPredDataset(name=d_name) num_tasks = dataset.num_tasks # obtaining the number of prediction tasks in a dataset split_idx = dataset.get_idx_split() train_idx, valid_idx, test_idx = split_idx["train"], split_idx[ "valid"], split_idx["test"] graph, label = dataset[0] # reshape graph["edge_index"] = graph["edge_index"].T # mini dataset if mini_data: graph['num_nodes'] = 500 mask = (graph['edge_index'][:, 0] < 500) * (graph['edge_index'][:, 1] < 500) graph["edge_index"] = graph["edge_index"][mask] graph["edge_feat"] = graph["edge_feat"][mask] label = label[:500] train_idx = np.arange(0, 400) valid_idx = np.arange(400, 450) test_idx = np.arange(450, 500) # read/compute node feature if mini_data: node_feat_path = './dataset/ogbn_proteins_node_feat_small.npy' else: node_feat_path = './dataset/ogbn_proteins_node_feat.npy' new_node_feat = None if os.path.exists(node_feat_path): print("Begin: read node feature".center(50, '=')) new_node_feat = np.load(node_feat_path) print("End: read node feature".center(50, '=')) else: print("Begin: compute node feature".center(50, '=')) start = time.perf_counter() for i in range(graph['num_nodes']): if i % 100 == 0: dur = time.perf_counter() - start print("{}/{}({}%), times: {:.2f}s".format( i, graph['num_nodes'], i / graph['num_nodes'] * 100, dur)) mask = (graph['edge_index'][:, 0] == i) current_node_feat = np.mean(np.compress(mask, graph['edge_feat'], axis=0), axis=0, keepdims=True) if i == 0: new_node_feat = [current_node_feat] else: new_node_feat.append(current_node_feat) new_node_feat = np.concatenate(new_node_feat, axis=0) print("End: compute node feature".center(50, '=')) print("Saving node feature in " + node_feat_path.center(50, '=')) np.save(node_feat_path, new_node_feat) print("Saving finish".center(50, '=')) print(new_node_feat) # create graph g = pgl.graph.Graph(num_nodes=graph["num_nodes"], edges=graph["edge_index"], node_feat={'node_feat': new_node_feat}, edge_feat=None) print("Create graph") print(g) return g, label, train_idx, valid_idx, test_idx, Evaluator(d_name)
dataset = OGB(ogb_dataset, transforms=[GCNFilter(), AdjToSpTensor()]) graph = dataset[0] x, adj, y = graph.x, graph.a, graph.y # Parameters channels = 256 # Number of channels for GCN layers dropout = 0.5 # Dropout rate for the features learning_rate = 1e-2 # Learning rate epochs = 200 # Number of training epochs N = dataset.n_nodes # Number of nodes in the graph F = dataset.n_node_features # Original size of node features n_out = ogb_dataset.num_classes # OGB labels are sparse indices # Data splits idx = ogb_dataset.get_idx_split() idx_tr, idx_va, idx_te = idx["train"], idx["valid"], idx["test"] mask_tr = np.zeros(N, dtype=bool) mask_va = np.zeros(N, dtype=bool) mask_te = np.zeros(N, dtype=bool) mask_tr[idx_tr] = True mask_va[idx_va] = True mask_te[idx_te] = True masks = [mask_tr, mask_va, mask_te] # Model definition x_in = Input(shape=(F, )) a_in = Input((N, ), sparse=True) x_1 = GCNConv(channels, activation="relu")([x_in, a_in]) x_1 = BatchNormalization()(x_1) x_1 = Dropout(dropout)(x_1)
'y_true': y[te_mask], 'y_pred': p[te_mask] })['rocauc'] return tr_auc, va_auc, te_auc # Load data dataset_name = 'ogbn-proteins' dataset = NodePropPredDataset(dataset_name) evaluator = Evaluator(dataset_name) graph, y = dataset[0] X, A, _ = ogb.graph_to_numpy(graph) N = A.shape[0] # Data splits idxs = dataset.get_idx_split() tr_idx, va_idx, te_idx = idxs["train"], idxs["valid"], idxs["test"] tr_mask = np.zeros(N, dtype=bool) tr_mask[tr_idx] = True va_mask = np.zeros(N, dtype=bool) va_mask[va_idx] = True te_mask = np.zeros(N, dtype=bool) te_mask[te_idx] = True masks = [tr_mask, va_mask, te_mask] # Parameters channels = 256 learning_rate = 1e-2 epochs = 200 es_patience = 200 F = X.shape[1]
def process(self): dataset = NodePropPredDataset(name=self.name, root="./data") node_type_dict = {"paper": 0, "author": 1, "field_of_study": 2, "institution": 3} edge_type_dict = { ("paper", "cites", "paper"): 0, ("author", "affiliated_with", "institution"): 1, ("author", "writes", "paper"): 2, ("paper", "has_topic", "field_of_study"): 3, } num_nodes_dict = dataset[0][0]["num_nodes_dict"] num_nodes = torch.as_tensor( [0] + [ num_nodes_dict["paper"], num_nodes_dict["author"], num_nodes_dict["field_of_study"], num_nodes_dict["institution"], ] ) cum_num_nodes = torch.cumsum(num_nodes, dim=-1) node_types = torch.repeat_interleave(torch.arange(0, 4), num_nodes[1:]) edge_index_dict = dataset[0][0]["edge_index_dict"] edge_index = [None] * len(edge_type_dict) edge_attr = [None] * len(edge_type_dict) i = 0 for k, v in edge_index_dict.items(): head, edge_type, tail = k head_offset = cum_num_nodes[node_type_dict[head]].item() tail_offset = cum_num_nodes[node_type_dict[tail]].item() src = v[0] + head_offset tgt = v[1] + tail_offset edge_tps = np.full(src.shape, edge_type_dict[k]) if edge_type == "cites": _edges = torch.as_tensor([src, tgt]) _src, _tgt = to_undirected(_edges).numpy() edge_tps = np.full(_src.shape, edge_type_dict[k]) edge_idx = np.vstack([_src, _tgt]) else: _src = np.concatenate([src, tgt]) _tgt = np.concatenate([tgt, src]) re_tps = np.full(src.shape, len(edge_type_dict)) re_k = (tail, "to", head) edge_type_dict[re_k] = len(edge_type_dict) edge_tps = np.concatenate([edge_tps, re_tps]) edge_idx = np.vstack([_src, _tgt]) edge_index[i] = edge_idx edge_attr[i] = edge_tps assert edge_index[i].shape[1] == edge_attr[i].shape[0] i += 1 edge_index = np.concatenate(edge_index, axis=-1) edge_index = torch.from_numpy(edge_index) edge_attr = torch.from_numpy(np.concatenate(edge_attr)) assert edge_index.shape[1] == edge_attr.shape[0] split_index = dataset.get_idx_split() train_index = torch.from_numpy(split_index["train"]["paper"]) val_index = torch.from_numpy(split_index["valid"]["paper"]) test_index = torch.from_numpy(split_index["test"]["paper"]) y = torch.as_tensor(dataset[0][1]["paper"]).view(-1) paper_feat = dataset[0][0]["node_feat_dict"]["paper"] data = Graph( y=y, edge_index=edge_index, edge_types=edge_attr, train_mask=train_index, val_mask=val_index, test_mask=test_index, node_types=node_types, ) # self.save_edges(data) torch.save((data, node_type_dict, edge_type_dict, num_nodes_dict), self.processed_paths[0]) np.save(self.processed_paths[1], paper_feat)
def load_data(data_dir, dataset_str, knn_size=None, epsilon=None, knn_metric='cosine', prob_del_edge=None, prob_add_edge=None, seed=1234, sparse_init_adj=False): """ Loads input data from gcn/data directory ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object; ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object; ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object; ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object; ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict object; ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object. All objects above must be saved using python pickle module. :param dataset_str: Dataset name ('cora', 'citeseer', 'pubmed') :return: All data input files loaded (as well the training/test data). """ assert (knn_size is None) or (epsilon is None) if dataset_str.startswith('ogbn'): # Open Graph Benchmark datasets from ogb.nodeproppred import NodePropPredDataset dataset = NodePropPredDataset(name=dataset_str) split_idx = dataset.get_idx_split() idx_train, idx_val, idx_test = torch.LongTensor(split_idx["train"]), torch.LongTensor(split_idx["valid"]), torch.LongTensor(split_idx["test"]) data = dataset[0] # This dataset has only one graph features = torch.Tensor(data[0]['node_feat']) labels = torch.LongTensor(data[1]).squeeze(-1) edge_index = data[0]['edge_index'] adj = to_undirected(edge_index, num_nodes=data[0]['num_nodes']) assert adj.diagonal().sum() == 0 and adj.max() <= 1 and (adj != adj.transpose()).sum() == 0 else: # datasets: Cora, Citeseer, PubMed names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): with open(os.path.join(data_dir, 'ind.{}.{}'.format(dataset_str, names[i])), 'rb') as f: if sys.version_info > (3, 0): objects.append(pkl.load(f, encoding='latin1')) else: objects.append(pkl.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) test_idx_reorder = parse_index_file(os.path.join(data_dir, 'ind.{}.test.index'.format(dataset_str))) test_idx_range = np.sort(test_idx_reorder) if dataset_str == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range-min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range-min(test_idx_range), :] = ty ty = ty_extended raw_features = sp.vstack((allx, tx)).tolil() raw_features[test_idx_reorder, :] = raw_features[test_idx_range, :] features = normalize_features(raw_features) raw_features = torch.Tensor(raw_features.todense()) features = torch.Tensor(features.todense()) adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] # labels = torch.LongTensor(np.where(labels)[1]) labels = torch.LongTensor(np.argmax(labels, axis=1)) idx_train = torch.LongTensor(range(len(y))) idx_val = torch.LongTensor(range(len(y), len(y) + 500)) idx_test = torch.LongTensor(test_idx_range.tolist()) if not knn_size is None: print('[ Using KNN-graph as input graph: {} ]'.format(knn_size)) adj = kneighbors_graph(features, knn_size, metric=knn_metric, include_self=True) adj_norm = normalize_sparse_adj(adj) if sparse_init_adj: adj_norm = sparse_mx_to_torch_sparse_tensor(adj_norm) else: adj_norm = torch.Tensor(adj_norm.todense()) elif not epsilon is None: print('[ Using Epsilon-graph as input graph: {} ]'.format(epsilon)) feature_norm = features.div(torch.norm(features, p=2, dim=-1, keepdim=True)) attention = torch.mm(feature_norm, feature_norm.transpose(-1, -2)) mask = (attention > epsilon).float() adj = attention * mask adj = (adj > 0).float() adj = sp.csr_matrix(adj) adj_norm = normalize_sparse_adj(adj) if sparse_init_adj: adj_norm = sparse_mx_to_torch_sparse_tensor(adj_norm) else: adj_norm = torch.Tensor(adj_norm.todense()) else: print('[ Using ground-truth input graph ]') if prob_del_edge is not None: adj = graph_delete_connections(prob_del_edge, seed, adj.toarray(), enforce_connected=False) adj = adj + np.eye(adj.shape[0]) adj_norm = normalize_adj(torch.Tensor(adj)) adj_norm = sp.csr_matrix(adj_norm) elif prob_add_edge is not None: adj = graph_add_connections(prob_add_edge, seed, adj.toarray(), enforce_connected=False) adj = adj + np.eye(adj.shape[0]) adj_norm = normalize_adj(torch.Tensor(adj)) adj_norm = sp.csr_matrix(adj_norm) else: adj = adj + sp.eye(adj.shape[0]) adj_norm = normalize_sparse_adj(adj) if sparse_init_adj: adj_norm = sparse_mx_to_torch_sparse_tensor(adj_norm) else: adj_norm = torch.Tensor(adj_norm.todense()) return adj_norm, features, labels, idx_train, idx_val, idx_test
def get_graph_data(d_name="ogbn-proteins", mini_data=False): """ Param: d_name: name of dataset mini_data: if mini_data==True, only use a small dataset (for test) """ # 导入 ogb 数据 dataset = NodePropPredDataset(name=d_name) num_tasks = dataset.num_tasks # obtaining the number of prediction tasks in a dataset split_idx = dataset.get_idx_split() train_idx, valid_idx, test_idx = split_idx["train"], split_idx[ "valid"], split_idx["test"] graph, label = dataset[0] # 调整维度,符合 PGL 的 Graph 要求 graph["edge_index"] = graph["edge_index"].T # 使用小规模数据,500个节点 if mini_data: graph['num_nodes'] = 500 mask = (graph['edge_index'][:, 0] < 500) * (graph['edge_index'][:, 1] < 500) graph["edge_index"] = graph["edge_index"][mask] graph["edge_feat"] = graph["edge_feat"][mask] label = label[:500] train_idx = np.arange(0, 400) valid_idx = np.arange(400, 450) test_idx = np.arange(450, 500) # 输出 dataset 的信息 print(graph.keys()) print("节点个数 ", graph["num_nodes"]) print("节点最小编号", graph['edge_index'][0].min()) print("边个数 ", graph["edge_index"].shape[1]) print("边索引 shape ", graph["edge_index"].shape) print("边特征 shape ", graph["edge_feat"].shape) print("节点特征是 ", graph["node_feat"]) print("species shape", graph['species'].shape) print("label shape ", label.shape) # 读取/计算 node feature # 确定读取文件的路径 if mini_data: node_feat_path = './dataset/ogbn_proteins_node_feat_small.npy' else: node_feat_path = './dataset/ogbn_proteins_node_feat.npy' new_node_feat = None if os.path.exists(node_feat_path): # 如果文件存在,直接读取 print("读取 node feature 开始".center(50, '=')) new_node_feat = np.load(node_feat_path) print("读取 node feature 成功".center(50, '=')) else: # 如果文件不存在,则计算 # 每个节点 i 的特征为其邻边特征的均值 print("计算 node feature 开始".center(50, '=')) start = time.perf_counter() for i in range(graph['num_nodes']): if i % 100 == 0: dur = time.perf_counter() - start print("{}/{}({}%), times: {:.2f}s".format( i, graph['num_nodes'], i / graph['num_nodes'] * 100, dur)) mask = (graph['edge_index'][:, 0] == i) # 选择 i 的所有邻边 # 计算均值 current_node_feat = np.mean(np.compress(mask, graph['edge_feat'], axis=0), axis=0, keepdims=True) if i == 0: new_node_feat = [current_node_feat] else: new_node_feat.append(current_node_feat) new_node_feat = np.concatenate(new_node_feat, axis=0) print("计算 node feature 结束".center(50, '=')) print("存储 node feature 中,在" + node_feat_path.center(50, '=')) np.save(node_feat_path, new_node_feat) print("存储 node feature 结束".center(50, '=')) print(new_node_feat) # 构造 Graph 对象 g = pgl.graph.Graph(num_nodes=graph["num_nodes"], edges=graph["edge_index"], node_feat={'node_feat': new_node_feat}, edge_feat=None) print("创建 Graph 对象成功") print(g) return g, label, train_idx, valid_idx, test_idx, Evaluator(d_name)
def ogbn_generate_split(job: signac.Project.Job, splitJob: signac.Project.Job, feature_graph_name, feature_graph_files): import constraint with utils.chdir(splitJob.sp.ogbn_path): from ogb.nodeproppred import NodePropPredDataset d_name = splitJob.sp.ogbn_name lock = ogbnLockDict.setdefault(splitJob.sp.ogbn_path, threading.Lock()) if not os.path.exists("dataset"): # In case dataset is not downloaded lock.acquire() ogbnDataset = NodePropPredDataset(name=d_name) lock.release() else: ogbnDataset = NodePropPredDataset(name=d_name) split_idx = ogbnDataset.get_idx_split() train_idx, valid_idx, test_idx = split_idx["train"], split_idx[ "valid"], split_idx["test"] graph, label = ogbnDataset[0] with job: splitJobSrc = utils.signac_tools.access_proj_job( job, splitJob.sp.feature_source, splitJob.sp.split_source) splitSrcName = splitJobSrc.doc["split_name"] # Copy not changing files for source_file, dest_file in [ (splitJobSrc.fn(f"{splitSrcName}.{ext}"), splitJob.fn(f"{feature_graph_name}.{ext}")) for ext in ('y', 'ty', 'ally', 'graph', 'test.index') ]: shutil.copy2(source_file, dest_file) with splitJobSrc: datasetSrc = utils.PlanetoidData(splitJobSrc.doc.split_name, ".", val_size=None) ogbnLabelCount = np.zeros((3, ogbnDataset.num_classes)) ogbnLabelCount[0, :] = (label[train_idx] == np.arange( ogbnDataset.num_classes)).sum(0) ogbnLabelCount[1, :] = (label[valid_idx] == np.arange( ogbnDataset.num_classes)).sum(0) ogbnLabelCount[2, :] = (label[test_idx] == np.arange( ogbnDataset.num_classes)).sum(0) srcLabelCount = np.zeros((3, job.sp.numClass)) srcLabelCount[0, :] = datasetSrc.y_all[datasetSrc.train_mask, :].sum(0) srcLabelCount[1, :] = datasetSrc.y_all[datasetSrc.val_mask, :].sum(0) srcLabelCount[2, :] = datasetSrc.y_all[datasetSrc.test_mask, :].sum(0) problem = constraint.Problem() problem.addVariables(range(job.sp.numClass), range(ogbnDataset.num_classes)) problem.addConstraint(constraint.AllDifferentConstraint()) for i in range(job.sp.numClass): problem.addConstraint( lambda x: np.all(ogbnLabelCount[:, x] >= srcLabelCount[:, i]), (i, )) solution = problem.getSolution() for srcClass, dstClass in solution.items(): assert np.all( ogbnLabelCount[:, dstClass] >= srcLabelCount[:, srcClass]) newFeatures = np.zeros( (datasetSrc.num_samples, graph["node_feat"].shape[1])) for scope, idx in (("train", train_idx), ("val", valid_idx), ("test", test_idx)): scope_mask = getattr(datasetSrc, f"{scope}_mask") for srcClass, dstClass in solution.items(): srcOpMask = np.logical_and(scope_mask, datasetSrc.labels == srcClass) dstSampleSet = list( set(idx).intersection(np.where(label == dstClass)[0])) sampleInds = random_state.choice(dstSampleSet, srcOpMask.sum(), replace=False) newFeatures[srcOpMask, :] = graph["node_feat"][sampleInds, :] x_mask = datasetSrc.train_mask allx_mask = (datasetSrc.train_mask + datasetSrc.val_mask) test_mask = datasetSrc.test_mask x = newFeatures[x_mask] allx = newFeatures[allx_mask] tx = newFeatures[test_mask] # .x; .tx; .allx pickle.dump(scipy.sparse.csr_matrix(x), open(splitJob.fn(f"{feature_graph_name}.x"), "wb")) pickle.dump(scipy.sparse.csr_matrix(allx), open(splitJob.fn(f"{feature_graph_name}.allx"), "wb")) pickle.dump(scipy.sparse.csr_matrix(tx), open(splitJob.fn(f"{feature_graph_name}.tx"), "wb")) assert all(map(splitJob.isfile, feature_graph_files)) splitJob.doc["succeeded"] = True splitJob.doc["split_name"] = feature_graph_name splitJob.doc.val_size = splitJobSrc.doc.val_size