class Runner: def __init__(self, params): self.params = params self.postfix = time.strftime('%d_%m_%Y') + '_' + time.strftime( '%H:%M:%S') self.prj_path = Path(__file__).parent.resolve() self.device = torch.device('cpu' if self.params.gpu == -1 else f'cuda:{params.gpu}') if self.params.evaluate: self.total_cell, self.num_genes, self.num_classes, self.id2label, self.test_dict, self.map_dict, self.time = load_data( params) else: self.total_cell, self.num_genes, self.num_classes, self.id2label, self.test_dict, self.time = load_data( params) """ test_dict = { 'graph': test_graph_dict, 'nid': test_index_dict, 'mask': test_mask_dict """ self.model = GNN(in_feats=params.dense_dim, n_hidden=params.hidden_dim, n_classes=self.num_classes, n_layers=1, gene_num=self.num_genes, activation=F.relu, dropout=params.dropout) self.load_model() self.num_neighbors = self.total_cell + self.num_genes self.model.to(self.device) def run(self): for num in self.params.test_dataset: tic = time.time() if self.params.evaluate: correct, total, unsure, acc, pred = self.evaluate_test(num) print( f"{self.params.species}_{self.params.tissue} #{num} Test Acc: {acc:.4f} ({correct}/{total}), Number of Unsure Cells: {unsure}" ) else: pred = self.inference(num) toc = time.time() print( f'{self.params.species}_{self.params.tissue} #{num} Time Consumed: {toc - tic + self.time:.2f} seconds.' ) self.save_pred(num, pred) def load_model(self): model_path = self.prj_path / 'pretrained' / self.params.species / 'models' / f'{self.params.species}-{self.params.tissue}.pt' state = torch.load(model_path, map_location=self.device) self.model.load_state_dict(state['model']) def inference(self, num): self.model.eval() new_logits = torch.zeros( (self.test_dict['graph'][num].number_of_nodes(), self.num_classes)) for nf in NeighborSampler(g=self.test_dict['graph'][num], batch_size=self.params.batch_size, expand_factor=self.total_cell + self.num_genes, num_hops=1, neighbor_type='in', shuffle=False, num_workers=8, seed_nodes=self.test_dict['nid'][num]): nf.copy_from_parent( ) # Copy node/edge features from the parent graph. with torch.no_grad(): logits = self.model(nf).cpu() batch_nids = nf.layer_parent_nid(-1).type(torch.long) new_logits[batch_nids] = logits new_logits = new_logits[self.test_dict['mask'][num]] new_logits = nn.functional.softmax(new_logits, dim=1).numpy() predict_label = [] for pred in new_logits: pred_label = self.id2label[pred.argmax().item()] if pred.max().item() < self.params.unsure_rate / self.num_classes: # unsure predict_label.append('unsure') else: predict_label.append(pred_label) return predict_label def evaluate_test(self, num): self.model.eval() new_logits = torch.zeros( (self.test_dict['graph'][num].number_of_nodes(), self.num_classes)) for nf in NeighborSampler(g=self.test_dict['graph'][num], batch_size=self.params.batch_size, expand_factor=self.total_cell + self.num_genes, num_hops=1, neighbor_type='in', shuffle=False, num_workers=8, seed_nodes=self.test_dict['nid'][num]): nf.copy_from_parent( ) # Copy node/edge features from the parent graph. with torch.no_grad(): logits = self.model(nf).cpu() batch_nids = nf.layer_parent_nid(-1).type(torch.long) new_logits[batch_nids] = logits new_logits = new_logits[self.test_dict['mask'][num]] new_logits = nn.functional.softmax(new_logits, dim=1).numpy() total = new_logits.shape[0] unsure_num, correct = 0, 0 predict_label = [] for pred, t_label in zip(new_logits, self.test_dict['label'][num]): pred_label = self.id2label[pred.argmax().item()] if pred.max().item() < self.params.unsure_rate / self.num_classes: # unsure unsure_num += 1 predict_label.append('unsure') else: if pred_label in self.map_dict[num][t_label]: correct += 1 predict_label.append(pred_label) return correct, total, unsure_num, correct / total, predict_label def save_pred(self, num, pred): label_map = pd.read_excel( './map/celltype2subtype.xlsx', sheet_name=self.params.species, header=0, names=['species', 'old_type', 'new_type', 'new_subtype']) label_map = label_map.fillna('N/A', inplace=False) oldtype2newtype = {} oldtype2newsubtype = {} for _, old_type, new_type, new_subtype in label_map.itertuples( index=False): oldtype2newtype[old_type] = new_type oldtype2newsubtype[old_type] = new_subtype save_path = self.prj_path / self.params.save_dir if not save_path.exists(): save_path.mkdir() if self.params.evaluate: df = pd.DataFrame({ 'index': self.test_dict['origin_id'][num], 'original label': self.test_dict['label'][num], 'cell_type': [oldtype2newtype.get(p, p) for p in pred], 'cell_subtype': [oldtype2newsubtype.get(p, p) for p in pred] }) else: df = pd.DataFrame({ 'index': self.test_dict['origin_id'][num], 'cell_type': [oldtype2newtype.get(p, p) for p in pred], 'cell_subtype': [oldtype2newsubtype.get(p, p) for p in pred] }) df.to_csv(save_path / (self.params.species + f"_{self.params.tissue}_{num}.csv"), index=False) print( f"output has been stored in {self.params.species}_{self.params.tissue}_{num}.csv" )
features = np.eye(n) # Generates node features # Yields indices to split data into training and test sets idx = np.random.RandomState(seed=42).permutation(n) # Transforms the numpy matrices/vectors to torch tensors features = torch.FloatTensor(features) features = features.to(device) y_train = torch.LongTensor(y_train) y_train = y_train.to(device) adj = torch.FloatTensor(adj) adj = adj.to(device) # Creates the model and specifies the optimizer model = GNN(features.shape[1], n_hidden_1, n_hidden_2, n_class, dropout_rate) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) train_test_cut = len(x_train) // 10 * 7 def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) output = output[0] loss_train = F.nll_loss(output[:train_test_cut], y_train[:train_test_cut]) acc_train = accuracy(output[:train_test_cut], y_train[:train_test_cut]) # loss_train = F.nll_loss(output[x_train], y_train) # acc_train = accuracy(output[x_train], y_train) loss_train.backward()