def generate_synthetic_data(self, model, num_synthetic): synthetic_input = [] synthetic_target = [] # convert the architectures in self.unlabeled to the right encoding for i in range(num_synthetic): arch = self.unlabeled[i] encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) seq = convert_arch_to_seq(encoded['adjacency'], encoded['operations'], max_n=self.max_n) synthetic_input.append(seq) # use the model to label the synthetic data synthetic_dataset = ControllerDataset(synthetic_input, None, False) synthetic_queue = torch.utils.data.DataLoader( synthetic_dataset, batch_size=len(synthetic_dataset), shuffle=False, pin_memory=True, drop_last=False) with torch.no_grad(): model.eval() for sample in synthetic_queue: if use_cuda: encoder_input = move_to_cuda(sample['encoder_input']) else: encoder_input = sample['encoder_input'] _, _, _, predict_value = model.encoder(encoder_input) synthetic_target += predict_value.data.squeeze().tolist() assert len(synthetic_input) == len(synthetic_target) return synthetic_input, synthetic_target
def query(self, xtest, info=None, eval_batch_size=100): test_seq_pool = [] for i, arch in enumerate(xtest): encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) seq = convert_arch_to_seq(encoded['adjacency'], encoded['operations'], max_n=self.max_n) test_seq_pool.append(seq) test_dataset = ControllerDataset(test_seq_pool, None, False) test_queue = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, drop_last=False) self.model.eval() pred = [] with torch.no_grad(): for _, sample in enumerate(test_queue): encoder_input = move_to_cuda(sample['encoder_input']) decoder_target = move_to_cuda(sample['decoder_target']) prediction, _, _ = self.model(encoder_input, decoder_target) pred.append(prediction.cpu().numpy()) pred = np.concatenate(pred) return np.squeeze(pred * self.std + self.mean)
def fit(self, xtrain, ytrain, train_info=None, params=None, **kwargs): # normalize accuracies self.mean = np.mean(ytrain) self.std = np.std(ytrain) if type(xtrain) is list: # when used in itself, we use xtrain = np.array([encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtrain]) if self.zc: mean, std = -10000000.0, 150000000.0 xtrain = [[*x, (train_info[i]-mean)/std] for i, x in enumerate(xtrain)] xtrain = np.array(xtrain) ytrain = np.array(ytrain) else: # when used in aug_lcsvr we feed in ndarray directly xtrain = xtrain ytrain = ytrain # convert to the right representation train_data = self.get_dataset(xtrain, ytrain) # fit to the training data self.model = self.train(train_data) # predict train_pred = np.squeeze(self.predict(xtrain)) train_error = np.mean(abs(train_pred-ytrain)) return train_error
def fit(self, xtrain, ytrain, train_info=None, params=None, **kwargs): # normalize accuracies self.mean = np.mean(ytrain) self.std = np.std(ytrain) xtrain = np.array([encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtrain]) if self.zc: mean, std = -10000000.0, 150000000.0 xtrain = [[*x, (train_info[i]-mean)/std] for i, x in enumerate(xtrain)] xtrain = np.array(xtrain) ytrain = np.array(ytrain) # convert to the right representation train_data = self.get_dataset(xtrain, ytrain) # instantiate model and fit to the training data self.model = self.get_model(train_data, **kwargs) self.train(train_data, **kwargs) print('Finished fitting GP') if self.optimize_gp_hyper: losses = self.optimize_GP_hyperparameters(self.model) print('Finished tuning GP hyperparameters') # predict train_pred = np.squeeze(self.predict(train_data[0])) train_error = np.mean(abs(train_pred-ytrain)) return train_error
def query(self, xtest, info=None): xtest = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest ]) xtest = np.array(xtest) return self.model.predict(xtest)
def fit(self, xtrain, ytrain, train_info=None, gcn_hidden=144, seed=0, batch_size=7, epochs=300, lr=1e-4, wd=3e-4): # get mean and std, normlize accuracies self.mean = np.mean(ytrain) self.std = np.std(ytrain) ytrain_normed = (ytrain - self.mean) / self.std # encode data in gcn format train_data = [] for i, arch in enumerate(xtrain): encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) encoded['val_acc'] = float(ytrain_normed[i]) train_data.append(encoded) train_data = np.array(train_data) self.model = self.get_model(gcn_hidden=gcn_hidden) data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True) self.model.to(device) criterion = nn.MSELoss().to(device) optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=wd) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs) self.model.train() for _ in range(epochs): meters = AverageMeterGroup() lr = optimizer.param_groups[0]["lr"] for _, batch in enumerate(data_loader): target = batch["val_acc"].float().to(device) prediction = self.model(batch) loss = criterion(prediction, target) loss.backward() optimizer.step() mse = accuracy_mse(prediction, target) meters.update({ "loss": loss.item(), "mse": mse.item() }, n=target.size(0)) lr_scheduler.step() train_pred = np.squeeze(self.query(xtrain)) train_error = np.mean(abs(train_pred - ytrain)) return train_error
def query(self, xtest, info=None): test_data = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest ]) m, v = self.model.predict(test_data) return np.squeeze(m)
def query(self, xtest, info=None): xtest = np.array([encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest]) if self.zc: mean, std = -10000000.0, 150000000.0 xtest = [[*x, (info[i]-mean)/std] for i, x in enumerate(xtest)] xtest = np.array(xtest) test_data = self.get_dataset(xtest) return np.squeeze(self.predict(test_data)) * self.std + self.mean
def pre_compute(self, xtrain, xtest): self.xtrain_zc_infos = {} self.xtest_zc_infos = {} if self.include_zero_cost: # compute zero-cost scores for test and train data from naslib.predictors.zerocost_estimators import ZeroCostEstimators for method_name in self.zero_cost_methods: print( f'pre-compute {method_name} scores for all train and test data' ) zc_method = ZeroCostEstimators(self.config, batch_size=64, method_type=method_name) zc_method.train_loader = copy.deepcopy(self.train_loader) xtrain_zc_scores = zc_method.query(xtrain) xtest_zc_scores = zc_method.query(xtest) self.xtrain_zc_infos[f'{method_name}_scores'] = list( xtrain_zc_scores) self.xtest_zc_infos[f'{method_name}_scores'] = list( xtest_zc_scores) if self.include_arch_encoding: from naslib.predictors.utils.encodings import encode for encoding_name in self.arch_encodings: train_arch_encoding = [ encode(arch, encoding_type=encoding_name, ss_type=self.config.search_space) for arch in xtrain ] test_arch_encoding = [ encode(arch, encoding_type=encoding_name, ss_type=self.config.search_space) for arch in xtest ] self.xtrain_zc_infos[f'{encoding_name}'] = train_arch_encoding self.xtest_zc_infos[f'{encoding_name}'] = test_arch_encoding
def fit(self, xtrain, ytrain, train_info=None, epochs=100, wd=0): if self.hyperparams is None: self.hyperparams = self.default_hyperparams.copy() batch_size = self.hyperparams['batch_size'] gcn_hidden = self.hyperparams['gcn_hidden'] lr = self.hyperparams['lr'] # get mean and std, normlize accuracies self.mean = np.mean(ytrain) self.std = np.std(ytrain) ytrain_normed = (ytrain - self.mean)/self.std # encode data in gcn format train_data = [] for i, arch in enumerate(xtrain): encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) encoded['val_acc'] = float(ytrain_normed[i]) train_data.append(encoded) train_data = np.array(train_data) nfeat = len(train_data[0]['operations'][0]) self.model = self.get_model(gcn_hidden=gcn_hidden,nfeat=nfeat) data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False) self.model.to(device) criterion = nn.MSELoss().to(device) optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=wd) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=0) self.model.train() for _ in range(epochs): meters = AverageMeterGroup() lr = optimizer.param_groups[0]["lr"] for _, batch in enumerate(data_loader): feat, adjmat, target = batch["operations"].to(device), \ batch["adjacency"].to(device), \ batch["val_acc"].float().to(device) prediction = self.model(feat, adjmat) # print('predictions:\n{}'.format(prediction)) loss = criterion(prediction, target) loss.backward() optimizer.step() mse = accuracy_mse(prediction, target) meters.update({"loss": loss.item(), "mse": mse.item()}, n=target.size(0)) lr_scheduler.step() train_pred = np.squeeze(self.query(xtrain)) train_error = np.mean(abs(train_pred-ytrain)) return train_error
def query(self, xtest, info=None, eval_batch_size=1000): test_data = np.array([encode(arch,encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest]) test_data_loader = DataLoader(test_data, batch_size=eval_batch_size) self.model.eval() pred = [] with torch.no_grad(): for _, batch in enumerate(test_data_loader): prediction = self.model(batch) pred.append(prediction.cpu().numpy()) pred = np.concatenate(pred) return pred * self.std + self.mean
def fit(self, xtrain, ytrain, train_info=None, **kwargs): _xtrain = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtrain ]) _ytrain = np.array(ytrain) self.model = self.get_model(**kwargs) self.train_model(_xtrain, _ytrain) train_pred = self.query(xtrain) train_error = np.mean(abs(train_pred - _ytrain)) return train_error
def query(self, xtest, info=None, eval_batch_size=100): test_data = np.array([encode(arch,encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest]) test_data_loader = DataLoader(test_data, batch_size=eval_batch_size,drop_last=False) self.model.eval() pred = [] with torch.no_grad(): for _, batch in enumerate(test_data_loader): feat, adjmat = batch["operations"], batch["adjacency"] prediction = self.model(feat, adjmat) pred.append(prediction.cpu().numpy()) pred = np.concatenate(pred) return pred * self.std + self.mean
def query(self, xtest, info=None): if type(xtest) is list: # when used in itself, we use xtest = np.array([encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest]) if self.zc: mean, std = -10000000.0, 150000000.0 xtest = [[*x, (info[i]-mean)/std] for i, x in enumerate(xtest)] xtest = np.array(xtest) else: # when used in aug_lcsvr we feed in ndarray directly xtest = xtest test_data = self.get_dataset(xtest) return np.squeeze(self.model.predict(test_data)) * self.std + self.mean
def prepare_features(self, xdata, info, train=True): # prepare training data features full_xdata = [[] for _ in range(len(xdata))] if len(self.zero_cost) > 0 and self.train_size <= self.max_zerocost: if self.run_pre_compute: for key in self.xtrain_zc_info: if train: full_xdata = [[*x, self.xtrain_zc_info[key][i]] for i, x in enumerate(full_xdata)] else: full_xdata = [[*x, self.xtest_zc_info[key][i]] for i, x in enumerate(full_xdata)] else: # if the zero_cost scores were not precomputed, they are in info full_xdata = [[*x, info[i]] for i, x in enumerate(full_xdata)] if 'sotle' in self.lce and len(info[0]['TRAIN_LOSS_lc']) >= 3: train_losses = np.array([lcs['TRAIN_LOSS_lc'][-1] for lcs in info]) mean = np.mean(train_losses) std = np.std(train_losses) normalized = (train_losses - mean) / std full_xdata = [[*x, normalized[i]] for i, x in enumerate(full_xdata)] elif 'sotle' in self.lce and len(info[0]['TRAIN_LOSS_lc']) < 3: logger.info('Not enough fidelities to use train loss') if 'valacc' in self.lce and len(info[0]['VAL_ACCURACY_lc']) >= 3: val_accs = [lcs['VAL_ACCURACY_lc'][-1] for lcs in info] mean = np.mean(val_accs) std = np.std(val_accs) normalized = (val_accs - mean) / std full_xdata = [[*x, normalized[i]] for i, x in enumerate(full_xdata)] if self.encoding_type is not None: xdata_encoded = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xdata ]) full_xdata = [[*x, *xdata_encoded[i]] for i, x in enumerate(full_xdata)] return np.array(full_xdata)
def fit(self, xtrain, ytrain, train_info=None, num_layers=20, layer_width=20, loss='mae', epochs=500, batch_size=32, lr=.001, verbose=0, regularization=0.2): xtrain = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtrain ]) ytrain = np.array(ytrain) if loss == 'mle': loss_fn = mle_loss elif loss == 'mape': loss_fn = mape_loss else: loss_fn = 'mae' self.model = self.get_model((xtrain.shape[1], ), loss=loss_fn, num_layers=num_layers, layer_width=layer_width, regularization=regularization) optimizer = keras.optimizers.Adam(lr=lr, beta_1=.9, beta_2=.99) self.model.compile(optimizer=optimizer, loss=loss_fn) self.model.fit(xtrain, ytrain, batch_size=batch_size, epochs=epochs, verbose=verbose) train_pred = np.squeeze(self.model.predict(xtrain)) train_error = np.mean(abs(train_pred - ytrain)) return train_error
def prepare_features(self, xdata, zc_info=None, lc_info=None): # this concatenates architecture features with zero-cost features full_xdata = [[] for _ in range(len(xdata))] if self.encoding_type is not None: # convert the architecture to a categorical encoding for i, arch in enumerate(xdata): encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) seq = convert_arch_to_seq(encoded['adjacency'], encoded['operations'], max_n=self.max_n) full_xdata[i] = [*full_xdata[i], *seq] if len(self.zero_cost) > 0 and self.train_size <= self.max_zerocost: # add zero_cost features for key in self.zero_cost: for i in range(len(xdata)): # todo: the following code is still specific to jacov. Make it for any zerocost # currently only one_hot zc features are supported jac_encoded = discretize(zc_info['jacov_scores'][i], upper_bounds=self.jacov_bins, one_hot=self.jacov_onehot) jac_encoded = [jac + self.zc_offset for jac in jac_encoded] full_xdata[i] = [*full_xdata[i], *jac_encoded] if self.add_lce: # add LCE features for i in range(len(xdata)): sotle_encoded = discretize(lc_info[i]['TRAIN_LOSS_lc'][-1], upper_bounds=self.lce_bins) sotle_encoded = [s + self.lce_offset for s in sotle_encoded] full_xdata[i] = [*full_xdata[i], *sotle_encoded] return full_xdata
def query(self, xtest, info=None, eval_batch_size=None): xtest = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtest ]) X_tensor = torch.FloatTensor(xtest).to(device) test_data = TensorDataset(X_tensor) eval_batch_size = len( xtest) if eval_batch_size is None else eval_batch_size test_data_loader = DataLoader(test_data, batch_size=eval_batch_size, pin_memory=False) self.model.eval() pred = [] with torch.no_grad(): for _, batch in enumerate(test_data_loader): prediction = self.model(batch[0].to(device)).view(-1) pred.append(prediction.cpu().numpy()) pred = np.concatenate(pred) return np.squeeze(pred)
def fit(self, xtrain, ytrain, train_info=None, wd=0, iterations=1, epochs=50, pretrain_epochs=50): if self.hyperparams is None: self.hyperparams = self.default_hyperparams.copy() batch_size = self.hyperparams['batch_size'] gcn_hidden = self.hyperparams['gcn_hidden'] lr = self.hyperparams['lr'] up_sample_ratio = 10 if self.ss_type == 'nasbench101': self.max_n = 7 encoder_length = 27 decoder_length = 27 vocab_size = 7 elif self.ss_type == 'nasbench201': self.max_n = 8 encoder_length = 35 decoder_length = 35 vocab_size = 9 elif self.ss_type == 'darts': self.max_n = 35 encoder_length = 629 decoder_length = 629 vocab_size = 13 elif self.ss_type == 'nlp': self.max_n = 25 encoder_length = 324 decoder_length = 324 vocab_size = 12 # get mean and std, normlize accuracies self.mean = np.mean(ytrain) self.std = np.std(ytrain) ytrain_normed = (ytrain - self.mean) / self.std # encode data in seq train_seq_pool = [] train_target_pool = [] for i, arch in enumerate(xtrain): encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) seq = convert_arch_to_seq(encoded['adjacency'], encoded['operations'], max_n=self.max_n) train_seq_pool.append(seq) train_target_pool.append(ytrain_normed[i]) self.model = NAO(encoder_layers, decoder_layers, mlp_layers, hidden_size, mlp_hidden_size, vocab_size, dropout, source_length, encoder_length, decoder_length).to(device) for i in range(iterations): print('Iteration {}'.format(i + 1)) train_encoder_input = train_seq_pool train_encoder_target = train_target_pool # Pre-train print('Pre-train EPD') train_controller(self.model, train_encoder_input, train_encoder_target, pretrain_epochs) print('Finish pre-training EPD') if self.semi: num_synthetic = self.synthetic_factor * len( train_encoder_input) synthetic_data = self.generate_synthetic_data( self.model, num_synthetic) synthetic_encoder_input, synthetic_encoder_target = synthetic_data if up_sample_ratio is None: up_sample_ratio = np.ceil( m / len(train_encoder_input)).astype(np.int) else: up_sample_ratio = up_sample_ratio all_encoder_input = train_encoder_input * up_sample_ratio + synthetic_encoder_input all_encoder_target = train_encoder_target * up_sample_ratio + synthetic_encoder_target print('Train EPD') train_controller(self.model, all_encoder_input, all_encoder_target, epochs) print('Finish training EPD') train_pred = np.squeeze(self.query(xtrain)) train_error = np.mean(abs(train_pred - ytrain)) return train_error
def fit(self, xtrain, ytrain, train_info=None, gcn_hidden=64, batch_size=100, lr=1e-3, wd=0, iteration=1, epochs=50, pretrain_epochs=50, synthetic_factor=1): up_sample_ratio = 10 if self.ss_type == 'nasbench101': self.max_n = 7 elif self.ss_type == 'nasbench201': self.max_n = 8 elif self.ss_type == 'darts': self.max_n = 35 # get mean and std, normlize accuracies self.mean = np.mean(ytrain) self.std = np.std(ytrain) ytrain_normed = (ytrain - self.mean) / self.std # encode data in seq train_seq_pool = [] train_target_pool = [] for i, arch in enumerate(xtrain): encoded = encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) seq = convert_arch_to_seq(encoded['adjacency'], encoded['operations'], max_n=self.max_n) train_seq_pool.append(seq) train_target_pool.append(ytrain_normed[i]) if self.ss_type == 'nasbench101': encoder_length = 27 decoder_length = 27 vocab_size = 7 elif self.ss_type == 'nasbench201': encoder_length = 35 decoder_length = 35 vocab_size = 9 elif self.ss_type == 'darts': encoder_length = 629 decoder_length = 629 vocab_size = 13 self.model = NAO( encoder_layers, decoder_layers, mlp_layers, hidden_size, mlp_hidden_size, vocab_size, dropout, source_length, encoder_length, decoder_length, ).to(device) for i in range(iteration): print('Iteration {}'.format(i + 1)) train_encoder_input = train_seq_pool train_encoder_target = train_target_pool # Pre-train print('Pre-train EPD') train_controller(self.model, train_encoder_input, train_encoder_target, pretrain_epochs) print('Finish pre-training EPD') if self.semi: # Generate synthetic data print('Generate synthetic data for EPD') m = synthetic_factor * len(xtrain) synthetic_encoder_input, synthetic_encoder_target = generate_synthetic_controller_data( self.model, train_encoder_input, m, self.ss_type) if up_sample_ratio is None: up_sample_ratio = np.ceil( m / len(train_encoder_input)).astype(np.int) else: up_sample_ratio = up_sample_ratio all_encoder_input = train_encoder_input * up_sample_ratio + synthetic_encoder_input all_encoder_target = train_encoder_target * up_sample_ratio + synthetic_encoder_target # Train print('Train EPD') train_controller(self.model, all_encoder_input, all_encoder_target, epochs) print('Finish training EPD') train_pred = np.squeeze(self.query(xtrain)) train_error = np.mean(abs(train_pred - ytrain)) return train_error
def fit(self, xtrain, ytrain, train_info=None, num_layers=20, layer_width=20, loss='mae', epochs=500, batch_size=32, lr=.001, verbose=0, regularization=0.2): self.mean = np.mean(ytrain) self.std = np.std(ytrain) _xtrain = np.array([ encode(arch, encoding_type=self.encoding_type, ss_type=self.ss_type) for arch in xtrain ]) _ytrain = np.array(ytrain) X_tensor = torch.FloatTensor(_xtrain).to(device) y_tensor = torch.FloatTensor(_ytrain).to(device) train_data = TensorDataset(X_tensor, y_tensor) data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=False, pin_memory=False) self.model = self.get_model(input_dims=_xtrain.shape[1], num_layers=num_layers, layer_width=num_layers * [20]) self.model.to(device) optimizer = optim.Adam(self.model.parameters(), lr=lr, betas=(0.9, 0.99)) if loss == 'mse': criterion = nn.MSELoss().to(device) elif loss == 'mae': criterion = nn.L1Loss().to(device) self.model.train() for e in range(epochs): meters = AverageMeterGroup() for b, batch in enumerate(data_loader): optimizer.zero_grad() input = batch[0].to(device) target = batch[1].to(device) prediction = self.model(input).view(-1) loss_fn = criterion(prediction, target) # add L1 regularization params = torch.cat([ x[1].view(-1) for x in self.model.named_parameters() if x[0] == 'out.weight' ]) loss_fn += regularization * torch.norm(params, 1) loss_fn.backward() optimizer.step() mse = accuracy_mse(prediction, target) meters.update({ "loss": loss_fn.item(), "mse": mse.item() }, n=target.size(0)) if e % 100 == 0: print('Epoch {}, {}, {}'.format(e, meters['loss'], meters['mse'])) train_pred = np.squeeze(self.query(xtrain)) train_error = np.mean(abs(train_pred - ytrain)) return train_error