def use(self, smiles: list, model_filename=None) -> list: # Figure out what to use if self._model is None and model_filename is None: raise RuntimeError( 'Model not previously built, or model not supplied') if model_filename is not None: self._model = torch.load(model_filename) self._model.eval() # Prepare data data = [] for idx, smi in enumerate(smiles): a, b = self._ce.encode(smi) data.append( gdata.Data(x=a, edge_index=self._ce.connectivity(smi), edge_attr=b).to(self._config['device'])) loader_test = gdata.DataLoader(data, batch_size=1, shuffle=False) # Get results results = [] for batch in loader_test: _, res = self._model(batch) results.append(res.detach().numpy()[0]) return results
def valid(valid_data, model, batch_size): pred = [] gt = [] loader = gd.DataLoader(valid_data, batch_size, shuffle=False) model.eval() for X in loader: gt.append(X.y) X = X.to('cuda') pred.append(model(X).detach().cpu()) pred = torch.cat(pred, dim=0).view(-1).numpy() gt = torch.cat(gt, dim=0).view(-1).numpy() return pred, gt
def train_node_classifier(model_name, dataset, **model_kwargs): pl.seed_everything(42) node_data_loader = geom_data.DataLoader(dataset, batch_size=1) # Create a PyTorch Lightning trainer. root_dir = os.path.join(CHECKPOINT_PATH, "NodeLevel" + model_name) os.makedirs(root_dir, exist_ok=True) trainer = pl.Trainer( default_root_dir=root_dir, callbacks=[ ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc") ], gpus=AVAIL_GPUS, max_epochs=200, progress_bar_refresh_rate=0, ) # 0 because epoch size is 1. trainer.logger._default_hp_metric = None # Optional logging argument that we don't need. # Check whether pretrained model exists. If yes, load it and skip training. pretrained_filename = os.path.join(CHECKPOINT_PATH, "NodeLevel%s.ckpt" % model_name) if os.path.isfile(pretrained_filename): print("Found pretrained model, loading...") model = NodeLevelGNN.load_from_checkpoint(pretrained_filename) else: pl.seed_everything() model = NodeLevelGNN(model_name=model_name, c_in=dataset.num_node_features, c_out=dataset.num_classes, **model_kwargs) trainer.fit(model, node_data_loader, node_data_loader) model = NodeLevelGNN.load_from_checkpoint( trainer.checkpoint_callback.best_model_path) # Test best model on the test set. test_result = trainer.test(model, test_dataloaders=node_data_loader, verbose=False) batch = next(iter(node_data_loader)) batch = batch.to(model.device) _, train_acc = model.forward(batch, mode="train") _, val_acc = model.forward(batch, mode="val") result = { "train": train_acc, "val": val_acc, "test": test_result[0]["test_acc"] } return model, result
def visualize_all_rank(model, valid_data, fn, batch_size): pred = [] gt = [] loader = gd.DataLoader(valid_data, batch_size, shuffle=False) model.eval() for X in loader: gt.append(X.y) X = X.to('cuda') pred.append(model(X).detach().cpu()) pred = torch.cat(pred, dim=0).view(-1).numpy() gt = torch.cat(gt, dim=0).view(-1).numpy() fig = plot_rank(pred, gt) fig.savefig(fn)
def get_dataloader(graph, X, y, batch_size=1,undirected=True, shuffle=True): """ Converts a graph and a dataset to a dataloader. Parameters: ---------- graph : igraph object The underlying graph to be fed to the graph neural networks. X : numpy ndarray Input dataset with columns as features and rows as observations. y : numpy ndarray Class labels. batch_size: int, default=1 The batch size. undirected: boolean if the input graph is undirected (symmetric adjacency matrix). Returns: -------- dataloader : a pytorch-geometric dataloader. All of the graphs will have the same connectivity (given by the input graph), but the node features will be the features from X. """ n_obs, n_features = X.shape rows, cols = np.where(graph == 1) edges = zip(rows.tolist(), cols.tolist()) sources = [] targets = [] for edge in edges: sources.append(edge[0]) targets.append(edge[1]) if undirected: sources.append(edge[0]) targets.append(edge[1]) edge_index = torch.tensor([sources,targets],dtype=torch.long) list_graphs = [] y = y.tolist() # print(y) for i in range(n_obs): y_tensor = torch.tensor(y[i]) X_tensor = torch.tensor(X[i,:]).view(X.shape[1], 1).float() data = geo_dt.Data(x=X_tensor, edge_index=edge_index, y=y_tensor) list_graphs.append(data.coalesce()) dataloader = geo_dt.DataLoader(list_graphs, batch_size=batch_size, shuffle=shuffle) return dataloader
def use(self, smiles: List[str], model_filename: str = None) -> List[List[float]]: """ Uses a pre-trained CompoundGCN, either trained in-session or recalled from a file, for use on new data Args: smiles (list[str]): SMILES strings to predict for model_filename (str, optional): filename/path of model to load, default = None (model trained in-session used) Returns: list[list[float]]: predicted values of shape [n_samples, n_targets] """ # Figure out what to use if self._model is None and model_filename is None: raise RuntimeError( 'Model not previously built, or model not supplied') if model_filename is not None: self._model = torch.load(model_filename) self._model.eval() # Prepare data data = [] for idx, smi in enumerate(smiles): a, b, c = self._ce.encode(smi) data.append( gdata.Data(x=a, edge_index=c, edge_attr=b).to(self._device)) loader_test = gdata.DataLoader(data, batch_size=1, shuffle=False) # Get results results = [] for batch in loader_test: res, _, _ = self._model(batch) results.append(res.detach().numpy().tolist()[0]) return results
experiment.add_tags(tags=[ f'graph_model={graph_model}', f'graph_attention_pooling={graph_attention_pooling}', ]) # Everything in the experiment will be put inside this try statement # If exception happens, clean things up and move to the next experiment try: # Dataloaders dataloader_kwargs = { 'pin_memory': True, 'batch_size': batch_size, 'num_workers': num_workers, } trn_loader = pyg_data.DataLoader(trn_dset, shuffle=True, **dataloader_kwargs) tst_loader = pyg_data.DataLoader(tst_dset, **dataloader_kwargs) # Construct graph model, might run into CUDA memory error graph_model_kwargs = { 'node_attr_dim': node_attr_dim, 'edge_attr_dim': edge_attr_dim, 'state_dim': graph_state_dim, 'num_conv': graph_num_conv, 'out_dim': graph_out_dim, 'attention_pooling': graph_attention_pooling, } if graph_model == 'gcn': drug_tower = EdgeGCNEncoder(**graph_model_kwargs)
def make_GeometricDataloader(dataset, batch_size_ = 4, shuffle_ = True): return geodata.DataLoader(dataset, batch_size = batch_size_, shuffle=shuffle_)
def train(self, smiles: List[str], target: List[List[float]], model_config: dict = None, valid_size: float = 0.2, valid_epoch_iter: int = 1, valid_patience: int = 16, batch_size: int = 1, lr: float = 0.001, lr_decay: float = 0.0, epochs: int = 128, verbose: int = 0, random_state: int = None, shuffle: bool = False, **kwargs) -> Tuple[List[float], List[float]]: """ Trains a CompoundCGN using supplied SMILES strings, target values Args: smiles (list[str]): list of SMILES strings, one per compound target (list[list[float]]): list of target values, shape [n_samples, n_targets], one per compound model_filename (str, optional): if not `None`, saves the trained model to this filename/path model_config (dict, optional): if not supplied, uses default model architecture: { 'n_messages': 1, 'n_hidden': 1, 'hidden_dim': 32, 'dropout': 0.00 } valid_size (float, optional): proportion of training set used for periodic validation, default = 0.2 valid_epoch_iter (int, optional): validation set performance is measured every `this` epochs, default = 1 epochs valid_patience (int, optional): if lower validation set loss not encountered after `this` many epochs, terminate to avoid overfitting, default = 16 batch_size (int, optional): size of each batch during training, default = 1 lr (float, optional): learning rate for Adam opt, default = 0.001 lr_decay (float, optional): linear rate of decay of learning rate per epoch, default = 0.0 epochs (int, optional): number of training epochs, default = 128 verbose (int, optional): training and validation loss printed to console every `this` epochs, default = 0 (no printing) random_state (int, optional): if not `None`, seeds validation subset randomized selection with this value shuffle (bool, optional): if True, shuffles training and validation subsets between training epochs, default = False **kwargs: additional arguments passed to torch.optim.Adam Returns: tuple[list[float], list[float]]: (training losses, validation losses) over all training epochs """ # Check for inequality in length of input, target data if len(smiles) != len(target): raise ValueError( 'Supplied SMILES and targets not the same length: {}, {}'. format(len(smiles), len(target))) # Prepare data self._ce = CompoundEncoder(smiles) data = [] for idx, smi in enumerate(smiles): a, b, c = self._ce.encode(smi) data.append( gdata.Data(x=a, edge_index=c, edge_attr=b, y=torch.tensor( target[idx]).type(torch.float).reshape( 1, len(target[idx]))).to(self._device)) # Split data into training, validation subsets data_train, data_valid = train_test_split(data, test_size=valid_size, random_state=random_state) loader_train = gdata.DataLoader(data_train, batch_size=batch_size, shuffle=True) loader_valid = gdata.DataLoader(data_valid, batch_size=batch_size, shuffle=True) # Create model if model_config is None: self._model = CompoundGCN(self._ce.ATOM_DIM, self._ce.BOND_DIM, len(target[0])) else: self._model = CompoundGCN(self._ce.ATOM_DIM, self._ce.BOND_DIM, len(target[0]), model_config['n_messages'], model_config['n_hidden'], model_config['hidden_dim'], model_config['dropout']) self._model.to(self._device) optimizer = torch.optim.Adam(self._model.parameters(), lr=lr, **kwargs) # Setup callbacks CBO = CallbackOperator() _lrdecay = LRDecayLinear(lr, lr_decay, optimizer) _validator = Validator(loader_valid, self._model, valid_epoch_iter, valid_patience) CBO.add_cb(_lrdecay) CBO.add_cb(_validator) # Record loss for return train_losses = [] valid_losses = [] # TRAIN BEGIN CBO.on_train_begin() # Begin training loop for epoch in range(epochs): # EPOCH BEGIN if not CBO.on_epoch_begin(epoch): break if shuffle: data_train, data_valid = train_test_split( data, test_size=valid_size, random_state=random_state) loader_train = gdata.DataLoader(data_train, batch_size=batch_size, shuffle=True) loader_valid = gdata.DataLoader(data_valid, batch_size=batch_size, shuffle=True) train_loss = 0.0 self._model.train() for b_idx, batch in enumerate(loader_train): # BATCH BEGIN if not CBO.on_batch_begin(b_idx): break optimizer.zero_grad() pred, _, _ = self._model(batch) target = batch.y # BATCH END, LOSS BEGIN if not CBO.on_batch_end(b_idx): break if not CBO.on_loss_begin(b_idx): break loss = self._model.loss(pred, target) loss.backward() # LOSS END, STEP BEGIN if not CBO.on_loss_end(b_idx): break if not CBO.on_step_begin(b_idx): break optimizer.step() train_loss += loss.detach().item() * batch.num_graphs # STEP END if not CBO.on_step_end(b_idx): break train_loss /= len(loader_train.dataset) # EPOCH END if not CBO.on_epoch_end(epoch): break if verbose > 0: if epoch % verbose == 0: print('Epoch: {} | Train Loss: {} | Valid Loss: {}'.format( epoch, train_loss, _validator._most_recent_loss)) train_losses.append(train_loss) valid_losses.append(_validator._most_recent_loss.detach().item()) # TRAIN END CBO.on_train_end() return (train_losses, valid_losses)
def simple_gnn_tutorial(): AVAIL_GPUS = min(1, torch.cuda.device_count()) BATCH_SIZE = 256 if AVAIL_GPUS else 64 # Path to the folder where the datasets are/should be downloaded. DATASET_PATH = os.environ.get("PATH_DATASETS", "data/") # Path to the folder where the pretrained models are saved. CHECKPOINT_PATH = os.environ.get("PATH_CHECKPOINT", "saved_models/GNNs/") # Setting the seed. pl.seed_everything(42) # Ensure that all operations are deterministic on GPU (if used) for reproducibility. torch.backends.cudnn.determinstic = True torch.backends.cudnn.benchmark = False # Github URL where saved models are stored for this tutorial. base_url = "https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial7/" # Files to download. pretrained_files = [ "NodeLevelMLP.ckpt", "NodeLevelGNN.ckpt", "GraphLevelGraphConv.ckpt" ] # Create checkpoint path if it doesn't exist yet. os.makedirs(CHECKPOINT_PATH, exist_ok=True) # For each file, check whether it already exists. If not, try downloading it. for file_name in pretrained_files: file_path = os.path.join(CHECKPOINT_PATH, file_name) if "/" in file_name: os.makedirs(file_path.rsplit("/", 1)[0], exist_ok=True) if not os.path.isfile(file_path): file_url = base_url + file_name print("Downloading %s..." % file_url) try: urllib.request.urlretrieve(file_url, file_path) except HTTPError as e: print( "Something went wrong. Please try to download the file from the GDrive folder," " or contact the author with the full output including the following error:\n", e, ) #-------------------- # Graph convolutions. class GCNLayer(nn.Module): def __init__(self, c_in, c_out): super().__init__() self.projection = nn.Linear(c_in, c_out) def forward(self, node_feats, adj_matrix): """ Args: node_feats: Tensor with node features of shape [batch_size, num_nodes, c_in] adj_matrix: Batch of adjacency matrices of the graph. If there is an edge from i to j, adj_matrix[b,i,j]=1 else 0. Supports directed edges by non-symmetric matrices. Assumes to already have added the identity connections. Shape: [batch_size, num_nodes, num_nodes] """ # Num neighbours = number of incoming edges. num_neighbours = adj_matrix.sum(dim=-1, keepdims=True) node_feats = self.projection(node_feats) node_feats = torch.bmm(adj_matrix, node_feats) node_feats = node_feats / num_neighbours return node_feats node_feats = torch.arange(8, dtype=torch.float32).view(1, 4, 2) adj_matrix = torch.Tensor([[[1, 1, 0, 0], [1, 1, 1, 1], [0, 1, 1, 1], [0, 1, 1, 1]]]) print("Node features:\n", node_feats) print("\nAdjacency matrix:\n", adj_matrix) layer = GCNLayer(c_in=2, c_out=2) layer.projection.weight.data = torch.Tensor([[1.0, 0.0], [0.0, 1.0]]) layer.projection.bias.data = torch.Tensor([0.0, 0.0]) with torch.no_grad(): out_feats = layer(node_feats, adj_matrix) print("Adjacency matrix", adj_matrix) print("Input features", node_feats) print("Output features", out_feats) #-------------------- # Graph attention. class GATLayer(nn.Module): def __init__(self, c_in, c_out, num_heads=1, concat_heads=True, alpha=0.2): """ Args: c_in: Dimensionality of input features c_out: Dimensionality of output features num_heads: Number of heads, i.e. attention mechanisms to apply in parallel. The output features are equally split up over the heads if concat_heads=True. concat_heads: If True, the output of the different heads is concatenated instead of averaged. alpha: Negative slope of the LeakyReLU activation. """ super().__init__() self.num_heads = num_heads self.concat_heads = concat_heads if self.concat_heads: assert c_out % num_heads == 0, "Number of output features must be a multiple of the count of heads." c_out = c_out // num_heads # Sub-modules and parameters needed in the layer. self.projection = nn.Linear(c_in, c_out * num_heads) self.a = nn.Parameter(torch.Tensor(num_heads, 2 * c_out)) # One per head. self.leakyrelu = nn.LeakyReLU(alpha) # Initialization from the original implementation. nn.init.xavier_uniform_(self.projection.weight.data, gain=1.414) nn.init.xavier_uniform_(self.a.data, gain=1.414) def forward(self, node_feats, adj_matrix, print_attn_probs=False): """ Args: node_feats: Input features of the node. Shape: [batch_size, c_in] adj_matrix: Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes] print_attn_probs: If True, the attention weights are printed during the forward pass (for debugging purposes) """ batch_size, num_nodes = node_feats.size(0), node_feats.size(1) # Apply linear layer and sort nodes by head. node_feats = self.projection(node_feats) node_feats = node_feats.view(batch_size, num_nodes, self.num_heads, -1) # We need to calculate the attention logits for every edge in the adjacency matrix. # Doing this on all possible combinations of nodes is very expensive # => Create a tensor of [W*h_i||W*h_j] with i and j being the indices of all edges. # Returns indices where the adjacency matrix is not 0 => edges. edges = adj_matrix.nonzero(as_tuple=False) node_feats_flat = node_feats.view(batch_size * num_nodes, self.num_heads, -1) edge_indices_row = edges[:, 0] * num_nodes + edges[:, 1] edge_indices_col = edges[:, 0] * num_nodes + edges[:, 2] a_input = torch.cat( [ torch.index_select( input=node_feats_flat, index=edge_indices_row, dim=0), torch.index_select( input=node_feats_flat, index=edge_indices_col, dim=0), ], dim=-1, ) # Index select returns a tensor with node_feats_flat being indexed at the desired positions. # Calculate attention MLP output (independent for each head). attn_logits = torch.einsum("bhc,hc->bh", a_input, self.a) attn_logits = self.leakyrelu(attn_logits) # Map list of attention values back into a matrix. attn_matrix = attn_logits.new_zeros( adj_matrix.shape + (self.num_heads, )).fill_(-9e15) attn_matrix[adj_matrix[..., None].repeat(1, 1, 1, self.num_heads) == 1] = attn_logits.reshape(-1) # Weighted average of attention. attn_probs = F.softmax(attn_matrix, dim=2) if print_attn_probs: print("Attention probs\n", attn_probs.permute(0, 3, 1, 2)) node_feats = torch.einsum("bijh,bjhc->bihc", attn_probs, node_feats) # If heads should be concatenated, we can do this by reshaping. Otherwise, take mean. if self.concat_heads: node_feats = node_feats.reshape(batch_size, num_nodes, -1) else: node_feats = node_feats.mean(dim=2) return node_feats layer = GATLayer(2, 2, num_heads=2) layer.projection.weight.data = torch.Tensor([[1.0, 0.0], [0.0, 1.0]]) layer.projection.bias.data = torch.Tensor([0.0, 0.0]) layer.a.data = torch.Tensor([[-0.2, 0.3], [0.1, -0.1]]) with torch.no_grad(): out_feats = layer(node_feats, adj_matrix, print_attn_probs=True) print("Adjacency matrix", adj_matrix) print("Input features", node_feats) print("Output features", out_feats) #-------------------- # PyTorch Geometric. gnn_layer_by_name = { "GCN": geom_nn.GCNConv, "GAT": geom_nn.GATConv, "GraphConv": geom_nn.GraphConv } #-------------------- # Node-level tasks: Semi-supervised node classification. cora_dataset = torch_geometric.datasets.Planetoid(root=DATASET_PATH, name="Cora") print(cora_dataset[0]) class GNNModel(nn.Module): def __init__( self, c_in, c_hidden, c_out, num_layers=2, layer_name="GCN", dp_rate=0.1, **kwargs, ): """ Args: c_in: Dimension of input features c_hidden: Dimension of hidden features c_out: Dimension of the output features. Usually number of classes in classification num_layers: Number of "hidden" graph layers layer_name: String of the graph layer to use dp_rate: Dropout rate to apply throughout the network kwargs: Additional arguments for the graph layer (e.g. number of heads for GAT) """ super().__init__() gnn_layer = gnn_layer_by_name[layer_name] layers = [] in_channels, out_channels = c_in, c_hidden for l_idx in range(num_layers - 1): layers += [ gnn_layer(in_channels=in_channels, out_channels=out_channels, **kwargs), nn.ReLU(inplace=True), nn.Dropout(dp_rate), ] in_channels = c_hidden layers += [ gnn_layer(in_channels=in_channels, out_channels=c_out, **kwargs) ] self.layers = nn.ModuleList(layers) def forward(self, x, edge_index): """ Args: x: Input features per node edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation) """ for layer in self.layers: # For graph layers, we need to add the "edge_index" tensor as additional input # All PyTorch Geometric graph layer inherit the class "MessagePassing", hence # we can simply check the class type. if isinstance(layer, geom_nn.MessagePassing): x = layer(x, edge_index) else: x = layer(x) return x class MLPModel(nn.Module): def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1): """ Args: c_in: Dimension of input features c_hidden: Dimension of hidden features c_out: Dimension of the output features. Usually number of classes in classification num_layers: Number of hidden layers dp_rate: Dropout rate to apply throughout the network """ super().__init__() layers = [] in_channels, out_channels = c_in, c_hidden for l_idx in range(num_layers - 1): layers += [ nn.Linear(in_channels, out_channels), nn.ReLU(inplace=True), nn.Dropout(dp_rate) ] in_channels = c_hidden layers += [nn.Linear(in_channels, c_out)] self.layers = nn.Sequential(*layers) def forward(self, x, *args, **kwargs): """ Args: x: Input features per node """ return self.layers(x) class NodeLevelGNN(pl.LightningModule): def __init__(self, model_name, **model_kwargs): super().__init__() # Saving hyperparameters. self.save_hyperparameters() if model_name == "MLP": self.model = MLPModel(**model_kwargs) else: self.model = GNNModel(**model_kwargs) self.loss_module = nn.CrossEntropyLoss() def forward(self, data, mode="train"): x, edge_index = data.x, data.edge_index x = self.model(x, edge_index) # Only calculate the loss on the nodes corresponding to the mask. if mode == "train": mask = data.train_mask elif mode == "val": mask = data.val_mask elif mode == "test": mask = data.test_mask else: assert False, "Unknown forward mode: %s" % mode loss = self.loss_module(x[mask], data.y[mask]) acc = (x[mask].argmax(dim=-1) == data.y[mask]).sum().float() / mask.sum() return loss, acc def configure_optimizers(self): # We use SGD here, but Adam works as well. optimizer = optim.SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=2e-3) return optimizer def training_step(self, batch, batch_idx): loss, acc = self.forward(batch, mode="train") self.log("train_loss", loss) self.log("train_acc", acc) return loss def validation_step(self, batch, batch_idx): _, acc = self.forward(batch, mode="val") self.log("val_acc", acc) def test_step(self, batch, batch_idx): _, acc = self.forward(batch, mode="test") self.log("test_acc", acc) def train_node_classifier(model_name, dataset, **model_kwargs): pl.seed_everything(42) node_data_loader = geom_data.DataLoader(dataset, batch_size=1) # Create a PyTorch Lightning trainer. root_dir = os.path.join(CHECKPOINT_PATH, "NodeLevel" + model_name) os.makedirs(root_dir, exist_ok=True) trainer = pl.Trainer( default_root_dir=root_dir, callbacks=[ ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc") ], gpus=AVAIL_GPUS, max_epochs=200, progress_bar_refresh_rate=0, ) # 0 because epoch size is 1. trainer.logger._default_hp_metric = None # Optional logging argument that we don't need. # Check whether pretrained model exists. If yes, load it and skip training. pretrained_filename = os.path.join(CHECKPOINT_PATH, "NodeLevel%s.ckpt" % model_name) if os.path.isfile(pretrained_filename): print("Found pretrained model, loading...") model = NodeLevelGNN.load_from_checkpoint(pretrained_filename) else: pl.seed_everything() model = NodeLevelGNN(model_name=model_name, c_in=dataset.num_node_features, c_out=dataset.num_classes, **model_kwargs) trainer.fit(model, node_data_loader, node_data_loader) model = NodeLevelGNN.load_from_checkpoint( trainer.checkpoint_callback.best_model_path) # Test best model on the test set. test_result = trainer.test(model, test_dataloaders=node_data_loader, verbose=False) batch = next(iter(node_data_loader)) batch = batch.to(model.device) _, train_acc = model.forward(batch, mode="train") _, val_acc = model.forward(batch, mode="val") result = { "train": train_acc, "val": val_acc, "test": test_result[0]["test_acc"] } return model, result # Small function for printing the test scores. def print_results(result_dict): if "train" in result_dict: print("Train accuracy: %4.2f%%" % (100.0 * result_dict["train"])) if "val" in result_dict: print("Val accuracy: %4.2f%%" % (100.0 * result_dict["val"])) print("Test accuracy: %4.2f%%" % (100.0 * result_dict["test"])) node_mlp_model, node_mlp_result = train_node_classifier( model_name="MLP", dataset=cora_dataset, c_hidden=16, num_layers=2, dp_rate=0.1) print_results(node_mlp_result) node_gnn_model, node_gnn_result = train_node_classifier( model_name="GNN", layer_name="GCN", dataset=cora_dataset, c_hidden=16, num_layers=2, dp_rate=0.1) print_results(node_gnn_result) #-------------------- # Edge-level tasks: Link prediction. #-------------------- # Graph-level tasks: Graph classification. tu_dataset = torch_geometric.datasets.TUDataset(root=DATASET_PATH, name="MUTAG") print("Data object:", tu_dataset.data) print("Length:", len(tu_dataset)) print("Average label: %4.2f" % (tu_dataset.data.y.float().mean().item())) torch.manual_seed(42) tu_dataset.shuffle() train_dataset = tu_dataset[:150] test_dataset = tu_dataset[150:] graph_train_loader = geom_data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) graph_val_loader = geom_data.DataLoader( test_dataset, batch_size=BATCH_SIZE) # Additional loader for a larger datasets. graph_test_loader = geom_data.DataLoader(test_dataset, batch_size=BATCH_SIZE) graph_train_loader = geom_data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) graph_val_loader = geom_data.DataLoader( test_dataset, batch_size=BATCH_SIZE) # Additional loader for a larger datasets. graph_test_loader = geom_data.DataLoader(test_dataset, batch_size=BATCH_SIZE) batch = next(iter(graph_test_loader)) print("Batch:", batch) print("Labels:", batch.y[:10]) print("Batch indices:", batch.batch[:40]) class GraphGNNModel(nn.Module): def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs): """ Args: c_in: Dimension of input features c_hidden: Dimension of hidden features c_out: Dimension of output features (usually number of classes) dp_rate_linear: Dropout rate before the linear layer (usually much higher than inside the GNN) kwargs: Additional arguments for the GNNModel object """ super().__init__() self.GNN = GNNModel(c_in=c_in, c_hidden=c_hidden, c_out=c_hidden, **kwargs) # Not our prediction output yet! self.head = nn.Sequential(nn.Dropout(dp_rate_linear), nn.Linear(c_hidden, c_out)) def forward(self, x, edge_index, batch_idx): """ Args: x: Input features per node edge_index: List of vertex index pairs representing the edges in the graph (PyTorch geometric notation) batch_idx: Index of batch element for each node """ x = self.GNN(x, edge_index) x = geom_nn.global_mean_pool(x, batch_idx) # Average pooling. x = self.head(x) return x class GraphLevelGNN(pl.LightningModule): def __init__(self, **model_kwargs): super().__init__() # Saving hyperparameters. self.save_hyperparameters() self.model = GraphGNNModel(**model_kwargs) self.loss_module = nn.BCEWithLogitsLoss( ) if self.hparams.c_out == 1 else nn.CrossEntropyLoss() def forward(self, data, mode="train"): x, edge_index, batch_idx = data.x, data.edge_index, data.batch x = self.model(x, edge_index, batch_idx) x = x.squeeze(dim=-1) if self.hparams.c_out == 1: preds = (x > 0).float() data.y = data.y.float() else: preds = x.argmax(dim=-1) loss = self.loss_module(x, data.y) acc = (preds == data.y).sum().float() / preds.shape[0] return loss, acc def configure_optimizers(self): # High lr because of small dataset and small model. optimizer = optim.AdamW(self.parameters(), lr=1e-2, weight_decay=0.0) return optimizer def training_step(self, batch, batch_idx): loss, acc = self.forward(batch, mode="train") self.log("train_loss", loss) self.log("train_acc", acc) return loss def validation_step(self, batch, batch_idx): _, acc = self.forward(batch, mode="val") self.log("val_acc", acc) def test_step(self, batch, batch_idx): _, acc = self.forward(batch, mode="test") self.log("test_acc", acc) def train_graph_classifier(model_name, **model_kwargs): pl.seed_everything(42) # Create a PyTorch Lightning trainer with the generation callback. root_dir = os.path.join(CHECKPOINT_PATH, "GraphLevel" + model_name) os.makedirs(root_dir, exist_ok=True) trainer = pl.Trainer( default_root_dir=root_dir, callbacks=[ ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc") ], gpus=AVAIL_GPUS, max_epochs=500, progress_bar_refresh_rate=0, ) trainer.logger._default_hp_metric = None # Check whether pretrained model exists. If yes, load it and skip training. pretrained_filename = os.path.join(CHECKPOINT_PATH, "GraphLevel%s.ckpt" % model_name) if os.path.isfile(pretrained_filename): print("Found pretrained model, loading...") model = GraphLevelGNN.load_from_checkpoint(pretrained_filename) else: pl.seed_everything(42) model = GraphLevelGNN( c_in=tu_dataset.num_node_features, c_out=1 if tu_dataset.num_classes == 2 else tu_dataset.num_classes, **model_kwargs, ) trainer.fit(model, graph_train_loader, graph_val_loader) model = GraphLevelGNN.load_from_checkpoint( trainer.checkpoint_callback.best_model_path) # Test best model on validation and test set. train_result = trainer.test(model, test_dataloaders=graph_train_loader, verbose=False) test_result = trainer.test(model, test_dataloaders=graph_test_loader, verbose=False) result = { "test": test_result[0]["test_acc"], "train": train_result[0]["test_acc"] } return model, result model, result = train_graph_classifier(model_name="GraphConv", c_hidden=256, layer_name="GraphConv", num_layers=3, dp_rate_linear=0.5, dp_rate=0.0) print("Train performance: %4.2f%%" % (100.0 * result["train"])) print("Test performance: %4.2f%%" % (100.0 * result["test"]))
########################################################################### # Dataset and dataloader dataset_kwargs = { 'target_list': TARGET_LIST, 'cid_smiles_dict': cid_smiles_dict, 'cid_dscrptr_dict': cid_dscrptr_dict } dataset = GraphToDscrptrDataset(cid_list=cid_list, **dataset_kwargs) dataloader_kwargs = { 'batch_size': 32, 'timeout': 1, 'pin_memory': True if use_cuda else False, 'num_workers': 2 if use_cuda else 0 } dataloader = pyg_data.DataLoader(dataset, shuffle=True, **dataloader_kwargs) model = EdgeGATEncoder(node_attr_dim=dataset.node_attr_dim, edge_attr_dim=dataset.edge_attr_dim, out_dim=len(TARGET_LIST)).to(device) model.train() data = next(iter(dataloader)) print(f'The input batch data is {data}') data = data.to(device) print(f'The output shape is {model(data).shape}')
def train(self, smiles: list, target: list, model_filename: str = None, model_config: dict = None): ''' GraphOperator.train: trains a graph neural network given SMILES strings, target values, supplied config (i.e. architecture, hyper- parameters) Args: smiles (list): list of SMILES strings (str) target (list): list of target values (1d, float) model_filename (str): if not None, saves model to this location model_config (dict): configuration dict; if none supplied, default is used Returns: None ''' # Check for inequality in length of input, target data if len(smiles) != len(target): raise ValueError( 'Supplied SMILES and targets not the same length: {}, {}'. format(len(smiles), len(target))) # Prepare data self._ce = CompoundEncoder(smiles) data = [] for idx, smi in enumerate(smiles): a, b = self._ce.encode(smi) data.append( gdata.Data(x=a, edge_index=self._ce.connectivity(smi), edge_attr=b, y=torch.tensor(target[idx]).type(torch.float)).to( self._config['device'])) # Split data into training, validation subsets data_train, data_valid = train_test_split( data, test_size=self._config['valid_size']) loader_train = gdata.DataLoader(data_train, batch_size=self._config['batch_size'], shuffle=True) loader_valid = gdata.DataLoader(data_valid, batch_size=self._config['batch_size'], shuffle=True) # Create model self._model = MessagePassingNet(self._ce.ATOM_DIM, len(target[0]), task=self._config['task'], config=model_config) self._model.construct() self._model.to(self._config['device']) optimizer = torch.optim.Adam(self._model.parameters(), lr=self._config['learning_rate']) # Setup callbacks CBO = CallbackOperator() _lrdecay = LRDecayLinear(self._config['learning_rate'], self._config['lr_decay'], optimizer) _validator = Validator(loader_valid, self._model, self._config['valid_epoch_iter'], self._config['valid_patience']) CBO.add_cb(_lrdecay) CBO.add_cb(_validator) # TRAIN BEGIN CBO.on_train_begin() # Begin training loop for epoch in range(self._config['epochs']): # EPOCH BEGIN if not CBO.on_epoch_begin(epoch): break train_loss = 0.0 self._model.train() for b_idx, batch in enumerate(loader_train): # BATCH BEGIN if not CBO.on_batch_begin(b_idx): break optimizer.zero_grad() embedding, pred = self._model(batch) target = batch.y if self._config['task'] == 'node': pred = pred[batch.train_mask] target = target[batch.train_mask] # BATCH END, LOSS BEGIN if not CBO.on_batch_end(b_idx): break if not CBO.on_loss_begin(b_idx): break loss = self._model.loss(pred, target) loss.backward() # LOSS END, STEP BEGIN if not CBO.on_loss_end(b_idx): break if not CBO.on_step_begin(b_idx): break optimizer.step() train_loss += loss.detach().item() * batch.num_graphs # STEP END if not CBO.on_step_end(b_idx): break train_loss /= len(loader_train.dataset) # EPOCH END if not CBO.on_epoch_end(epoch): break if self._config['verbose']: print('Epoch: {} | Train Loss: {} | Valid Loss: {}'.format( epoch, train_loss, _validator._best_loss)) # TRAIN END CBO.on_train_end() if model_filename is not None: torch.save(self._model, model_filename)
def main(): parser = argparse.ArgumentParser( description='Graph Model for Dragon7 Descriptor Prediction') parser.add_argument('--model_type', type=str, default='mpnn', help='type of convolutional graph model', choices=['mpnn', 'gcn', 'gat']) parser.add_argument('--pooling', type=str, default='set2set', help='global pooling layer for graph model', choices=['set2set', 'attention']) parser.add_argument('--state_dim', type=int, default=256, help='hidden state dimension for conv layers') parser.add_argument('--num_conv', type=int, default=3, help='number of convolution operations') parser.add_argument('--num_dscrptr', type=int, default=100, help='number of dragon7 descriptors for prediction') parser.add_argument('--init_lr', type=float, default=5e-4) parser.add_argument('--weight_decay', type=float, default=1e-5, help='L2 regularization for nn weights') parser.add_argument('--lr_decay_patience', type=int, default=8, help='decay patience for learning rate') parser.add_argument('--lr_decay_factor', type=float, default=0.5, help='decay factor for learning rate') parser.add_argument('--max_num_epochs', type=int, default=100, help='maximum number of epochs') parser.add_argument('--val_size', type=int or float, default=10000) parser.add_argument('--tst_size', type=int or float, default=10000) parser.add_argument('--no_cuda', action='store_true', help='disables CUDA training') parser.add_argument('--cuda_device', type=int, default=0, help='CUDA device ID') parser.add_argument('--rand_state', type=int, default=0, help='random state of numpy/sklearn/pytorch') args = parser.parse_args() print('Training Arguments:\n' + json.dumps(vars(args), indent=4)) # Constants and initializations ########################################### use_cuda = torch.cuda.is_available() and (not args.no_cuda) device = torch.device(f'cuda: {args.cuda_device}' if use_cuda else 'cpu') print(f'Training on device {device}') # It seems that NVidia Apex is not compatible with PyG # amp_handle = amp.init(enabled=False) seed_random_state(args.rand_state) target_list = c.TARGET_D7_DSCRPTR_NAMES[:args.num_dscrptr] # Get the trn/val/tst dataset and dataloaders ############################# print('Preparing CID-SMILES dictionary ... ') cid_smiles_csv_path = c.PCBA_CID_SMILES_CSV_PATH cid_smiles_df = pd.read_csv(cid_smiles_csv_path, sep='\t', header=0, index_col=0, dtype=str) cid_smiles_df.index = cid_smiles_df.index.map(str) cid_smiles_dict = cid_smiles_df.to_dict()['SMILES'] del cid_smiles_df print('Preparing CID-dscrptr dictionary ... ') # cid_dscrptr_dict has a structure of dict[target_name][str(cid)] # cid_dscrptr_df = pd.read_csv(c.PCBA_CID_TARGET_D7DSCPTR_CSV_PATH, # sep='\t', # header=0, # index_col=0, # usecols=['CID'] + TARGET_LIST, # dtype={t: np.float32 for t in TARGET_LIST}) # cid_dscrptr_df.index = cid_dscrptr_df.index.map(str) # # # Perform STD normalization for multi-target regression # dscrptr_mean = cid_dscrptr_df.mean().values # dscrptr_std = cid_dscrptr_df.std().values # cid_dscrptr_df = \ # (cid_dscrptr_df - cid_dscrptr_df.mean()) / cid_dscrptr_df.std() # # cid_dscrptr_dict = cid_dscrptr_df.to_dict() # del cid_dscrptr_df cid_list = [] dscrptr_array = np.array([], dtype=np.float32).reshape(0, len(target_list)) for chunk_cid_dscrptr_df in pd.read_csv( c.PCBA_CID_TARGET_D7DSCPTR_CSV_PATH, sep='\t', header=0, index_col=0, usecols=['CID'] + target_list, dtype={ **{ 'CID': str }, **{t: np.float32 for t in target_list} }, chunksize=2**16): chunk_cid_dscrptr_df.index = chunk_cid_dscrptr_df.index.map(str) cid_list.extend(list(chunk_cid_dscrptr_df.index)) dscrptr_array = np.vstack((dscrptr_array, chunk_cid_dscrptr_df.values)) # Perform STD normalization for multi-target regression dscrptr_mean = np.mean(dscrptr_array, axis=0) dscrptr_std = np.std(dscrptr_array, axis=0) dscrptr_array = (dscrptr_array - dscrptr_mean) / dscrptr_std assert len(cid_list) == len(dscrptr_array) cid_dscrptr_dict = { cid: dscrptr for cid, dscrptr in zip(cid_list, dscrptr_array) } print('Preparing datasets and dataloaders ... ') # List of CIDs for training, validation, and testing # Make sure that all entries in the CID list is valid smiles_cid_set = set(list(cid_smiles_dict.keys())) dscrptr_cid_set = set(list(cid_dscrptr_dict.keys())) cid_list = sorted(list(smiles_cid_set & dscrptr_cid_set), key=int) trn_cid_list, tst_cid_list = \ train_test_split(cid_list, test_size=args.tst_size, random_state=args.rand_state) trn_cid_list, val_cid_list = \ train_test_split(trn_cid_list, test_size=args.val_size, random_state=args.rand_state) # # Downsizing training set for the purpose of testing # _, trn_cid_list = train_test_split(trn_cid_list, # test_size=args.val_size * 10, # random_state=args.rand_state) # Datasets and dataloaders dataset_kwargs = { 'target_list': target_list, 'cid_smiles_dict': cid_smiles_dict, 'cid_dscrptr_dict': cid_dscrptr_dict, # 'multi_edge_indices': (MODEL_TYPE.upper() == 'GCN') or # (MODEL_TYPE.upper() == 'GAT') } trn_dataset = GraphToDscrptrDataset(cid_list=trn_cid_list, **dataset_kwargs) val_dataset = GraphToDscrptrDataset(cid_list=val_cid_list, **dataset_kwargs) tst_dataset = GraphToDscrptrDataset(cid_list=tst_cid_list, **dataset_kwargs) dataloader_kwargs = { 'batch_size': 32, 'timeout': 1, 'pin_memory': True if use_cuda else False, 'num_workers': 4 if use_cuda else 0 } trn_loader = pyg_data.DataLoader(trn_dataset, shuffle=True, **dataloader_kwargs) val_loader = pyg_data.DataLoader(val_dataset, **dataloader_kwargs) tst_loader = pyg_data.DataLoader(tst_dataset, **dataloader_kwargs) # Model, optimizer, and scheduler ######################################### attention_pooling = (args.pooling == 'attention') if args.model_type.upper() == 'GCN': model = EdgeGCNEncoder(node_attr_dim=trn_dataset.node_attr_dim, edge_attr_dim=trn_dataset.edge_attr_dim, state_dim=args.state_dim, num_conv=args.num_conv, out_dim=len(target_list), attention_pooling=attention_pooling).to(device) elif args.model_type.upper() == 'GAT': model = EdgeGATEncoder(node_attr_dim=trn_dataset.node_attr_dim, edge_attr_dim=trn_dataset.edge_attr_dim, state_dim=args.state_dim, num_conv=args.num_conv, out_dim=len(target_list), attention_pooling=attention_pooling).to(device) else: model = MPNN(node_attr_dim=trn_dataset.node_attr_dim, edge_attr_dim=trn_dataset.edge_attr_dim, state_dim=args.state_dim, num_conv=args.num_conv, out_dim=len(target_list), attention_pooling=attention_pooling).to(device) num_params = count_parameters(model) print(f'Model Summary (Number of Parameters: {num_params})\n{model}') # optimizer = torch.optim.Adam( # model.parameters(), lr=args.init_lr, amsgrad=True) optimizer = torch.optim.RMSprop(model.parameters(), lr=args.init_lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=args.lr_decay_factor, patience=args.lr_decay_patience, min_lr=1e-6) def train(loader): model.train() loss_all = 0 for data in loader: data = data.to(device) optimizer.zero_grad() loss = F.mse_loss(model(data), data.y.view(-1, len(target_list))) # with amp_handle.scale_loss(loss, optimizer) as scaled_loss: # scaled_loss.backward() loss.backward() loss_all += loss.item() * data.num_graphs optimizer.step() return loss_all / len(trn_loader.dataset) def test(loader): model.eval() mae_array = np.zeros(shape=(len(target_list))) trgt_array = np.zeros(shape=(0, len(target_list))) pred_array = np.zeros(shape=(0, len(target_list))) for data in loader: data = data.to(device) pred = model(data) trgt = data.y.cpu().numpy().reshape(-1, len(target_list)) pred = pred.detach().cpu().numpy().reshape(-1, len(target_list)) trgt = trgt * dscrptr_std + dscrptr_mean pred = pred * dscrptr_std + dscrptr_mean trgt_array = np.vstack((trgt_array, trgt)) pred_array = np.vstack((pred_array, pred)) mae_array += np.sum(np.abs(trgt - pred), axis=0) mae_array = mae_array / len(loader.dataset) # # Save the results # np.save(c.PROCESSED_DATA_DIR + '/pred_array.npy', pred_array) # np.save(c.PROCESSED_DATA_DIR + '/trgt_array.npy', trgt_array) r2_array = np.array([ r2_score(y_pred=pred_array[:, i], y_true=trgt_array[:, i]) for i, t in enumerate(target_list) ]) for i, target in enumerate(target_list): print(f'Target Descriptor Name: {target:15s}, ' f'R2: {r2_array[i]:.4f}, MAE: {mae_array[i]:.4f}') return np.mean(r2_array), np.mean(mae_array) print('Training started.') best_val_r2 = None for epoch in range(1, args.max_num_epochs + 1): # scheduler.step() lr = scheduler.optimizer.param_groups[0]['lr'] loss = train(trn_loader) print('Validation ' + '#' * 80) val_r2, val_mae = test(val_loader) print('#' * 80) scheduler.step(val_r2) if best_val_r2 is None or val_r2 > best_val_r2: best_val_r2 = val_r2 print('Testing ' + '#' * 80) tst_r2, tst_mae = test(tst_loader) print('#' * 80) print(f'Epoch: {epoch:03d}, LR: {lr:6f}, Loss: {loss:.4f}, ', f'Validation R2: {val_r2:.4f} MAE: {val_mae:.4f}; ', f'Testing R2: {tst_r2:.4f} MAE: {tst_mae:.4f};')
def main(): # load config config = Config() opts = config.initialize() config.save(os.path.join(opts.to, "config.json")) print(config) # prepare dataset pre_transform = processes_dict[opts.processed_name] datasets = {} if opts.task in ["train", "tradition"]: datasets["train"] = GenomicsData(opts.root_name, opts.source_files, pre_transform, "train", opts.processed_name, val_prop=opts.split[1], test_prop=opts.split[2], random_seed=opts.random_seed) if opts.split[2] > 0.0: datasets["test"] = GenomicsData(opts.root_name, opts.source_files, pre_transform, "test", opts.processed_name, val_prop=opts.split[1], test_prop=opts.split[2], random_seed=opts.random_seed) if opts.split[1] > 0.0: datasets["val"] = GenomicsData(opts.root_name, opts.source_files, pre_transform, "val", opts.processed_name, val_prop=opts.split[1], test_prop=opts.split[2], random_seed=opts.random_seed) else: datasets["eval"] = GenomicsData(opts.root_name, opts.source_files, pre_transform, "train", opts.processed_name, val_prop=opts.split[1], test_prop=opts.split[2], random_seed=opts.random_seed) in_dim = datasets["train"].features_dim num_nodes = datasets["train"].num_nodes dataloaders = { k: pyg_data.DataLoader(dat, batch_size=opts.batch_size, shuffle=(k == "train")) for k, dat in datasets.items() } # tranditional evaulation: if opts.task == "tradition": train_scores, test_scores = traditional_surv_analysis(datasets, opts) save_generally([train_scores, test_scores], os.path.join(opts.to, "tradition.json")) print("") print("train:") print(train_scores) print("test:") print(test_scores) return # networks if opts.load_model is not None: model = torch.load(opts.load_model) else: model = DeepDynGNN(in_dim, num_nodes, opts) # criterion kwargs = {} if opts.criterion.lower() == "svm_loss": kwargs["r"] = opts.svm_loss_r criterion = losses_dict[opts.criterion.lower()](**kwargs) # scores scores = {s: scores_dict[s]() for s in opts.scores} if opts.task == "train": if opts.optimizer.lower() == "adam": optimizer = optim.Adam(model.parameters(), opts.learning_rate) elif opts.optimizer.lower() == "adamw": optimizer = optim.AdamW(model.parameters(), opts.learning_rate) elif opts.optimizer.lower() == "adammax": optimizer = optim.Adamax(model.parameters(), opts.leanring_rate) elif opts.optimizer.lower() == "rms": optimizer = optim.RMSprop(model.parameters(), opts.learning_rate) elif opts.optimizer.lower() == "momentum": optimizer = optim.SGD(model.parameters(), opts.learning_rate, momentum=0.9) else: raise NotImplementedError("%s is not implemented." % opts.optimizer) # train model model, hist = train_val_test(model, criterion, optimizer, dataloaders, scores, opts) # save model save_generally(model, os.path.join(opts.to, "model.pth")) save_generally(hist, os.path.join(opts.to, "hist.json")) elif opts.task == "eval": assert opts.load_model is not None # predict model eval_loss, eval_scores, pred, target = eval_one_epoch( model, criterion, dataloaders["eval"], scores, opts.device) print("eval loss is : %.4f" % eval_loss) for k, v in eval_scores.items(): print("eval %s is : %.4f" % (k, v)) # save pred eval_scores.update({"loss": eval_loss}) save_generally(eval_scores, os.path.join(opts.to, "eval_res.json")) save_generally(pred, os.path.join(opts.to, "pred.txt")) save_generally(target, os.path.join(opts.to, "target.txt"))
def search(args): if args.benchmark == '101': from search.nas_101_utils import spec2data, NASBench, Architect, OPS_FULL, N_NODES, MAX_EDGES from nasbench import api else: import nas_201_api as nb from search.nas_201_utils import train_and_eval, CifarBench, Architect, arch2data OP_SPACE = cell.SearchSpaceNames['nas-bench-201'] N_NODES = 4 def initialize_pool(bench, size): arch_pool = [] seen_arch = set() # time_cost = 0. while len(seen_arch) < size: arch = Architect().randomize_() # unique_str = arch.struct.to_unique_str(True) struct = arch.struct arch_str = bench.arch_str(struct) while arch_str is None or arch_str in seen_arch: arch.randomize_() struct = arch.struct arch_str = bench.arch_str(struct) seen_arch.add(arch_str) bench.eval_arch(arch_str) arch_pool.append(arch) # time_cost += cost return arch_pool, seen_arch random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True logdir = args.log_dir writer = SummaryWriter(log_dir=logdir) logger = get_logger(os.path.join(logdir, 'log')) logger.info('Arguments : -------------------------------') for name, value in args._get_kwargs(): logger.info('{:16} : {:}'.format(name, value)) if args.benchmark == '101': nas_bench = api.NASBench(args.nas_bench_path) cifar_bench = NASBench(nas_bench, average_all=args.average_all) predictor = Predictor(t_edge=1, t_node=len(OPS_FULL), n_node=N_NODES, h_dim=64, n_out=1).to('cuda') def enum_arch_data(): for h in cifar_bench._all_hashes: yield h, spec2data(cifar_bench.hash2spec(h)) elif args.benchmark == '201': nas_bench = nb.NASBench201API(args.nas_bench_path) predictor = Predictor(len(OP_SPACE), 1, N_NODES, 64, 1).to('cuda') cifar_bench = CifarBench(nas_bench) def enum_arch_data(): duplicated = set() for idx in range(len(nas_bench)): archstr = nas_bench[idx] struct = gt.Structure.str2structure(archstr) unique_str = struct.to_unique_str(True) if unique_str not in duplicated: duplicated.add(unique_str) yield archstr, arch2data(archstr) optim_p = torch.optim.Adam(predictor.parameters(), args.p_lr, weight_decay=args.weight_decay) logger.info("params size = %fM" % (count_parameters(predictor) / 1e6)) logger.info("\n") logger.info("initialize arch pool") arch_pool, seen_arch = initialize_pool(cifar_bench, args.pool_size) history = [cifar_bench.arch_str(a.struct) for a in arch_pool] # logging initial samples best_arch_seen = cifar_bench.choose_best(seen_arch) logger.info("init pool: %d, seen arch: %d" % (len(arch_pool), len(seen_arch))) logger.info("simulated time cost: %f" % cifar_bench.total_cost) logger.info("best initial arch:") cifar_bench.log_arch(best_arch_seen, 0, 'acc_best', logger, writer) logger.info('start training predictor') train_loader = gd.DataListLoader(cifar_bench.history_data(), args.train_batch_size, shuffle=True) for epoch in tqdm(range(args.epochs)): loss = predictor.fit(train_loader, optim_p, 0, None, args.regression, args.grad_clip, 0) writer.add_scalar('loss_r', loss, epoch) logger.info('preparing valid data') all_arch, all_data = list( zip(*tqdm(filter(lambda v: v[0] not in seen_arch, enum_arch_data())))) pred_results = [] pred_loader = gd.DataLoader(all_data, batch_size=args.step_batch_size) with torch.no_grad(): for batch in tqdm(pred_loader, total=len(pred_loader)): batch = batch.to('cuda') pred_results.append(predictor(batch).cpu().numpy()) pred_results = np.concatenate(pred_results, axis=0).flatten() arg_rank = np.argsort(pred_results) while cifar_bench.total_cost < args.time_budget: cur_index = arg_rank[0] logger.info("current time cost: %f" % cifar_bench.total_cost) logger.info("arch to eval: %s" % all_arch[cur_index]) cifar_bench.eval_arch(all_arch[cur_index]) history.append(all_arch[cur_index]) arg_rank = arg_rank[1:] best_arch_seen = cifar_bench.choose_best(history) with open(os.path.join(logdir, 'selections'), 'w') as f: for a in history: f.write(a + ',' + str(cifar_bench.lookup(a))) f.write('\n') return best_arch_seen, cifar_bench.valid_acc( best_arch_seen), cifar_bench.test_acc(best_arch_seen)