def build_model(self) -> nn.Module: model = nn.Sequential( nn.Conv2d(NUM_CHANNELS, IMAGE_SIZE, kernel_size=(3, 3)), nn.ReLU(), nn.Conv2d(32, 32, kernel_size=(3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), nn.Dropout2d(pedl.get_hyperparameter("layer1_dropout")), nn.Conv2d(32, 64, (3, 3), padding=1), nn.ReLU(), nn.Conv2d(64, 64, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), nn.Dropout2d(pedl.get_hyperparameter("layer2_dropout")), Flatten(), nn.Linear(2304, 512), nn.ReLU(), nn.Dropout2d(pedl.get_hyperparameter("layer3_dropout")), nn.Linear(512, NUM_CLASSES), nn.Softmax(dim=0), ) # If loading backbone weights, do not call reset_parameters() or # call before loading the backbone weights. reset_parameters(model) return model
def __init__(self, optimizer, last_epoch=-1): """ Custom LR scheudler for the LR to be adjusted based on the batch size """ self.seq_len = pedl.get_hyperparameter("bptt") self.start_lr = pedl.get_hyperparameter("learning_rate") super(MyLR, self).__init__(optimizer, last_epoch)
def optimizer(self, model: nn.Module) -> torch.optim.Optimizer: # type: ignore return torch.optim.RMSprop( # type: ignore model.parameters(), lr=pedl.get_hyperparameter("learning_rate"), weight_decay=pedl.get_hyperparameter("learning_rate_decay"), alpha=0.9, )
def make_data_loaders(experiment_config: Dict[str, Any], hparams: Dict[str, Any]) -> Tuple[Sequence, Sequence]: """ Provides training and validation data for model training. This function splits previously-downloaded training and validation CSVs containing features and labels together into Keras Sequence's wrapping features and labels separately. """ # The first row of each data set is not a typical CSV header with column labels, but rather a # dataset descriptor of the following format: # # <num observations>,<num features>,<species 0 label>,<species 1 label>,<species 2 label> # # The remaining rows then contain observations, with the four features followed by label. The # label values in the observation rows take on the values 0, 1, or 2 which correspond to the # three species in the header. Define the columns explicitly here so that we can more easily # separate features and labels below. label_header = "Species" ds_columns = [ "SepalLength", "SepalWidth", "PetalLength", "PetalWidth", label_header, ] # Ignore header line and read the training and test CSV observations into pandas DataFrame's train = pd.read_csv(experiment_config["data"]["train_url"], names=ds_columns, header=0) train_features, train_labels = train, train.pop(label_header) test = pd.read_csv(experiment_config["data"]["test_url"], names=ds_columns, header=0) test_features, test_labels = test, test.pop(label_header) # Since we're building a classifier, convert the labels in the raw dataset (0, 1, or 2) to # one-hot vector encodings that we'll to construct the Sequence data loaders that PEDL expects. train_labels_categorical = to_categorical(train_labels, num_classes=3) test_labels_categorical = to_categorical(test_labels, num_classes=3) # The training and test sets are so small that we can safely use PEDL's in-memory implementation # of keras.utils.Sequence, InMemorySequence. train = InMemorySequence( data=train_features, labels=train_labels_categorical, batch_size=pedl.get_hyperparameter("batch_size"), ) test = InMemorySequence( data=test_features, labels=test_labels_categorical, batch_size=pedl.get_hyperparameter("batch_size"), ) return train, test
def optimizer(self, model: nn.Module): """ Required Method. Sets the optimizer to use Returns: optimizer """ optimizer = torch.optim.SGD( model.parameters(), lr=pedl.get_hyperparameter("learning_rate"), weight_decay=pedl.get_hyperparameter("wdecay"), ) return optimizer
def _calculate_seq_len(self, i): bptt = (pedl.get_hyperparameter("bptt") if np.random.random() < 0.95 else pedl.get_hyperparameter("bptt") / 2.0) seq_len = max(5, int(np.random.normal(bptt, 5))) seq_len = min( seq_len, pedl.get_hyperparameter("bptt") + pedl.get_hyperparameter("max_seq_length_delta"), ) seq_len = min( pedl.get_hyperparameter("bptt") if self.valid else seq_len, self.data_length - 1 - i) return seq_len
def create_lr_scheduler(self, optimizer: torch.optim.Optimizer): """ Required Method to use a learning rate scheduler Returns: PEDL scheduler object PEDL will handle the learning rate scheduler update based on the PEDL LRScheduler parameters If step_every_batch or step_every_epoch is True, PEDL will handle the .step(). If both are false, the user will be in charge of calling .step(). """ self.myLR = MyLR(optimizer) step_every_batch = pedl.get_hyperparameter("step_every_batch") step_every_epoch = pedl.get_hyperparameter("step_every_epoch") return LRScheduler(self.myLR, step_every_batch=step_every_batch, step_every_epoch=step_every_epoch)
def get_lr(self): ret = list(self.base_lrs) self.base_lrs = [ self.start_lr * self.seq_len / pedl.get_hyperparameter("bptt") for base_lr in self.base_lrs ] return ret
def train_batch(self, batch: TorchData, model: nn.Module, epoch_idx: int, batch_idx: int): """ Trains the provided batch. Returns: Dictionary of the calculated Metrics """ features, labels = batch self.update_and_step_lr(features.shape[0]) # set hidden if it's the first run if batch_idx == 0: self.hidden = model.init_hidden( pedl.get_hyperparameter("batch_size")) # detach to prevent backpropagating to far for i in range(len(self.hidden)): self.hidden[i] = self.hidden[i].detach() log_prob, self.hidden, rnn_hs, dropped_rnn_hs = model(features, self.hidden, return_h=True) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), labels.contiguous().view(-1)) if pedl.get_hyperparameter("alpha") > 0: loss = loss + sum( pedl.get_hyperparameter("alpha") * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) loss = (loss + sum( pedl.get_hyperparameter("beta") * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])) * 1.0 try: perplexity = math.exp(loss / len(features)) except Exception as e: logging.error("Calculating perplexity failed with error: %s", e) perplexity = 100000 if math.isnan(perplexity): perplexity = 100000 return {"loss": loss, "perplexity": perplexity}
def cnn_model(): # Get hyperparameters for this trial. kernel_size = pedl.get_hyperparameter("kernel_size") dropout = pedl.get_hyperparameter("dropout") activation = pedl.get_hyperparameter("activation") model = Sequential() model.add( Conv2D(32, kernel_size=(kernel_size, kernel_size), activation="relu", input_shape=INPUT_SHAPE)) model.add(Conv2D(64, (3, 3), activation=activation)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(dropout)) model.add(Flatten()) model.add(Dense(128, activation=activation)) model.add(Dropout(0.5)) model.add(Dense(NUM_CLASSES, activation="softmax")) return model
def build_model(self, hparams: Dict[str, Any]) -> Model: """ Define model for iris classification. This is a simple model with one hidden layer to predict iris species (setosa, versicolor, or virginica) based on four input features (length and width of sepals and petals). """ inputs = Input(shape=(4, )) dense1 = Dense(pedl.get_hyperparameter("layer1_dense_size"))(inputs) dense2 = Dense(NUM_CLASSES, activation="softmax")(dense1) model = Model(inputs=inputs, outputs=dense2) model.compile( RMSprop( lr=pedl.get_hyperparameter("learning_rate"), decay=pedl.get_hyperparameter("learning_rate_decay"), ), categorical_crossentropy, [categorical_accuracy], ) return model
def __iter__(self): seq_len = 0 if not self.valid else pedl.get_hyperparameter("bptt") i = 0 while i < self.data_length: seq_len = self._calculate_seq_len(i) start = i end = i + seq_len # sometimes the seq_len is 0 # this means we have reached the end of the data if seq_len == 0: break yield list(range(start, end + 1)) i += seq_len
def evaluate_full_dataset(self, data_loader: torch.utils.data.DataLoader, model: nn.Module): """ Determines if multiple architectures should be evaluated and sends to approprate path Returns: the results of the evaluated dataset or the best result from multiple evaluations """ eval_same_arch = pedl.get_hyperparameter("eval_same_arch") if eval_same_arch: # evaluate the same architecture res = self.evaluate_dataset(data_loader, model, self.arch) else: res = self.evaluate_multiple_archs(data_loader, model) return res
def make_data_loaders(experiment_config: Dict[str, Any], hparams: Dict[str, Any]): """ Required method to load in the datasets returns: PEDL DataLoader """ corpus = data.Corpus(pedl.get_data_config().get("data_loc")) train_dataset = PTBData(corpus.train, pedl.get_hyperparameter("seq_len"), pedl.get_hyperparameter("batch_size")) test_dataset = PTBData(corpus.valid, pedl.get_hyperparameter("seq_len"), pedl.get_hyperparameter("eval_batch_size")) return ( DataLoader(train_dataset, batch_sampler=BatchSamp(train_dataset), collate_fn=PadSequence()), DataLoader( test_dataset, batch_sampler=BatchSamp(test_dataset, valid=True), collate_fn=PadSequence(), ), )
def build_model(self) -> nn.Module: model = nn.Sequential( nn.Conv2d(1, pedl.get_hyperparameter("n_filters1"), kernel_size=5), nn.MaxPool2d(2), nn.ReLU(), nn.Conv2d( pedl.get_hyperparameter("n_filters1"), pedl.get_hyperparameter("n_filters2"), kernel_size=5, ), nn.MaxPool2d(2), nn.ReLU(), Flatten(), nn.Linear(16 * pedl.get_hyperparameter("n_filters2"), 50), nn.ReLU(), nn.Dropout2d(pedl.get_hyperparameter("dropout")), nn.Linear(50, 10), nn.LogSoftmax(), ) # If loading backbone weights, do not call reset_parameters() or # call before loading the backbone weights. reset_parameters(model) return model
def __init__(self) -> None: super().__init__() # Set hyperparameters that influence the model architecture. self.n_filters1 = pedl.get_hyperparameter("n_filters1") self.n_filters2 = pedl.get_hyperparameter("n_filters2") self.dropout = pedl.get_hyperparameter("dropout") # Define the central model. self.model = nn.Sequential( nn.Conv2d(1, self.n_filters1, kernel_size=5), nn.MaxPool2d(2), nn.ReLU(), nn.Conv2d(self.n_filters1, self.n_filters2, kernel_size=5), nn.MaxPool2d(2), nn.ReLU(), Flatten(), nn.Linear(16 * self.n_filters2, 50), nn.ReLU(), nn.Dropout2d(self.dropout), ) # type: nn.Sequential # Predict digit labels from self.model. self.digit = nn.Sequential(nn.Linear(50, 10), nn.Softmax(dim=0)) # Predict binary labels from self.model. self.binary = nn.Sequential(nn.Linear(50, 1), nn.Sigmoid(), Squeeze())
def __init__(self, hparams): super().__init__(hparams) self.kernel_size = pedl.get_hyperparameter("kernel_size") self.dropout = pedl.get_hyperparameter("dropout") self.pool_size = pedl.get_hyperparameter("pool_size") self.l2_reg = pedl.get_hyperparameter("l2_reg") self.lr = pedl.get_hyperparameter("lr") self.my_batch_size = pedl.get_hyperparameter("batch_size") self.data_info = load_organized_data_info(IMGS_DIM_1D)
def make_data_loaders(experiment_config, hparams): fashion_mnist = keras.datasets.fashion_mnist (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() train_images, test_images = train_images / 255.0, test_images / 255.0 batch_size = pedl.get_hyperparameter("batch_size") train = data.InMemorySequence(data=train_images, labels=train_labels, batch_size=batch_size) test = data.InMemorySequence(data=test_images, labels=test_labels, batch_size=batch_size) return train, test
def evaluate_dataset(self, data_loader, model, arch, split=None): """ Evaluates the full dataset against the given arch """ hidden = model.init_hidden(pedl.get_hyperparameter("eval_batch_size")) model = self.set_model_arch(arch, model) total_loss = 0 num_samples_seen = 0 for i, batch in enumerate(data_loader): features, targets = batch features, targets = features.cuda(), targets.cuda() log_prob, hidden = model(features, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(features) for i in range(len(hidden)): hidden[i] = hidden[i].detach() num_samples_seen += features.shape[0] try: perplexity = math.exp(total_loss.item() / num_samples_seen) except Exception as e: logging.error("Calculating perplexity failed with error: %s", e) perplexity = 100000 if math.isnan(perplexity): perplexity = 100000 if math.isnan(loss): loss = 100000 return {"loss": total_loss, "perplexity": perplexity}
def evaluate_multiple_archs(self, data_loader, model): """ Helper that randomly selects architectures and evaluates their performance This function is only called if eval_same_arch is False and should not be used for the primary NAS search """ num_archs_to_eval = pedl.get_hyperparameter("num_archs_to_eval") sample_vals = [] for _ in range(num_archs_to_eval): arch = self.sample_arch() res = self.evaluate_dataset(data_loader, model, arch) perplexity = res["perplexity"] loss = res["loss"] sample_vals.append((arch, perplexity, loss)) sample_vals = sorted(sample_vals, key=lambda x: x[1]) logging.info("best arch found: ", sample_vals[0]) self.save_archs(sample_vals) return {"loss": sample_vals[0][2], "perplexity": sample_vals[0][1]}
def optimizer(self, model: nn.Module) -> torch.optim.Optimizer: # type: ignore return torch.optim.SGD(model.parameters(), lr=pedl.get_hyperparameter("learning_rate"), momentum=0.9)
def __init__(self, data, seq_len, batch_size, valid=False): self.batch_size = batch_size self.data = self.batchify(data) self.max_seq_len = pedl.get_hyperparameter( "bptt") + pedl.get_hyperparameter("max_seq_length_delta") self.valid = valid
MODELS_DIR = join(dirname(dirname(__file__)), 'models') MISC_DIR = join(dirname(dirname(__file__)), 'misc') IMGS_DIM_3D = (3, 256, 256) CNN_MODEL_FILE = join(MODELS_DIR, 'cnn.h5') MAX_EPOCHS = 500 W_INIT = 'he_normal' LAST_FEATURE_MAPS_LAYER = 46 LAST_FEATURE_MAPS_SIZE = (128, 8, 8) PENULTIMATE_LAYER = 51 PENULTIMATE_SIZE = 2048 SOFTMAX_LAYER = 55 NUM_CLASSES = SOFTMAX_SIZE = 1584 # Hyper parameters kernel_size = pedl.get_hyperparameter("kernel_size") dropout = pedl.get_hyperparameter("dropout") pool_size = pedl.get_hyperparameter("pool_size") L2_REG = pedl.get_hyperparameter("l2_reg") lr = pedl.get_hyperparameter("lr") BATCH_SIZE = pedl.get_hyperparameter("batch_size") IMGS_DIM_1D = 256 MODEL_NAME = 'cnn_2_9069_vl.h5' LAYER_SIZES = { 'feature_maps': LAST_FEATURE_MAPS_SIZE, 'penultimate': PENULTIMATE_SIZE, 'softmax': SOFTMAX_SIZE }
def build_model(self) -> nn.Module: """ Required Method that builds the model Returns: PyTorch Model """ arch_to_use = pedl.get_hyperparameter("arch_to_use") if hasattr(genotypes, arch_to_use): self.arch = getattr(genotypes, arch_to_use) logging.info("using genotype.{0}".format(self.arch)) else: self.arch = self.sample_arch() logging.info("using random arch.{0}".format(self.arch)) model = RNNModel( PTB_NUMBER_TOKENS, pedl.get_hyperparameter("emsize"), pedl.get_hyperparameter("nhid"), pedl.get_hyperparameter("nhidlast"), pedl.get_hyperparameter("dropout"), pedl.get_hyperparameter("dropouth"), pedl.get_hyperparameter("dropoutx"), pedl.get_hyperparameter("dropouti"), pedl.get_hyperparameter("dropoute"), genotype=self.arch, ) # Made for stacking multiple cells, by default the depth is set to 1 # which will not run this for loop for _ in range( pedl.get_hyperparameter("depth") - 1): # minus 1 because 1 gets auto added by the main model new_cell = model.cell_cls( pedl.get_hyperparameter("emsize"), pedl.get_hyperparameter("nhid"), pedl.get_hyperparameter("dropouth"), pedl.get_hyperparameter("dropoutx"), self.arch, pedl.get_hyperparameter("init_op"), ) model.rnns.append(new_cell) model.batch_size = pedl.get_hyperparameter("batch_size") return model
def save_archs(self, data): out_file = pedl.get_data_config().get( "out_file") + pedl.get_hyperparameter("seed") with open(os.path.join(out_file), "wb+") as f: pkl.dump(data, f)