def test_corrtrain_correlation_multi(self): from leakagemodels import LeakageModel """ Artificial example to test AICorrNet and trace processing with multiple leakage values and multiple subkeys. """ # ------------------------------ # Generate data # ------------------------------ traces = [ # Contains abs(trace). Shape = [trace, point] [1, 1, 1, -15], [-4, 2, 2, -12], [10, 3, 3, 8], [8, 1, 1, -14], [9, 0, -3, 8], ] plaintexts = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 13, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ] keys = [ [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ] # Convert to numpy traces = np.array(traces) plaintexts = np.array(plaintexts) keys = np.array(keys) trace_set = TraceSet(name='test', traces=traces, plaintexts=plaintexts, keys=keys) # ------------------------------ # Preprocess data # ------------------------------ conf = Namespace( max_cache=0, augment_roll=False, augment_noise=False, normalize=False, traces_per_set=4, online=False, dataset_id='qa', cnn=False, leakage_model=LeakageModelType.AES_MULTI, input_type=AIInputType.SIGNAL, augment_shuffle=True, n_hidden_layers=1, n_hidden_nodes=256, activation='leakyrelu', metric_freq=100, regularizer=None, reglambda=0.001, model_suffix=None, use_bias=True, batch_norm=True, hamming=False, key_low=1, key_high=3, loss_type='correlation', lr=0.001, epochs=5000, batch_size=512, norank=False, ) it_dummy = AICorrSignalIterator([], conf, batch_size=10000, request_id=None, stream_server=None) x, y = it_dummy._preprocess_trace_set(trace_set) # ------------------------------ # Train and obtain encodings # ------------------------------ model = ai.AICorrNet(conf, input_dim=4, name="test") print(model.info()) rank_cb = rank.CorrRankCallback(conf, '/tmp/deleteme/', save_best=False, save_path=None) rank_cb.set_trace_set(trace_set) if model.using_regularization: print( "Warning: cant do correlation loss test because regularizer will influence loss function" ) return # Find optimal weights print("The x (EM samples) and y (leakage model values) are:") print(x) print(y) print( "When feeding x through the model without training, the encodings become:" ) print(model.predict(x)) print("Training now") model.train_set(x, y, save=False, epochs=conf.epochs, extra_callbacks=[rank_cb]) print("Done training") # Get the encodings of the input data using the same approach used in ops.py corrtest (iterate over rows) result = [] for i in range(0, x.shape[0]): result.append( model.predict(np.array([x[i, :]], dtype=float))[0] ) # Result contains sum of points such that corr with y[key_index] is maximal for all key indices. Shape = [trace, 16] result = np.array(result) print( "When feeding x through the model after training, the encodings for key bytes %d to %d become:\n %s" % (conf.key_low, conf.key_high, str(result))) # ------------------------------ # Check loss function # ------------------------------ # Evaluate the model to get the loss for the encodings predicted_loss = model.model.evaluate(x, y, verbose=0) # Manually calculate the loss using numpy to verify that we are learning a correct correlation calculated_loss = 0 num_keys = (conf.key_high - conf.key_low) num_outputs = LeakageModel.get_num_outputs(conf) // num_keys for i in range(0, num_keys): subkey_hws = y[:, i * num_outputs:(i + 1) * num_outputs] subkey_encodings = result[:, i * num_outputs:(i + 1) * num_outputs] print("Subkey %d HWs : %s" % (i + conf.key_low, str(subkey_hws))) print("Subkey %d encodings: %s" % (i + conf.key_low, str(subkey_encodings))) y_key = subkey_hws.reshape([-1, 1]) y_pred = subkey_encodings.reshape([-1, 1]) print("Flattened subkey %d HWs : %s" % (i + conf.key_low, str(y_key))) print("Flattened subkey %d encodings: %s" % (i + conf.key_low, str(y_pred))) # Calculate correlation (numpy approach) corr_key_i = np.corrcoef(y_pred[:, 0], y_key[:, 0], rowvar=False)[1, 0] print("corr_num: %s" % corr_key_i) calculated_loss += 1.0 - corr_key_i print("These values should be close:") print("Predicted loss: %s" % str(predicted_loss)) print("Calculated loss: %s" % str(calculated_loss)) self.assertAlmostEqual(predicted_loss, calculated_loss, places=2)
def aitrain(self, training_trace_set_paths, validation_trace_set_paths, conf): resolve_paths( training_trace_set_paths) # Get absolute paths for training set resolve_paths( validation_trace_set_paths) # Get absolute paths for validation set # Hardcoded stuff subtype = 'custom' # Determine type of model to train model_type = get_conf_model_type( conf ) # TODO: Refactor 'name' to 'model_type' everywhere and let user specify modeltype in [] params of "train" activity # Select training iterator (gathers data, performs augmentation and preprocessing) training_iterator, validation_iterator = aiiterators.get_iterators_for_model( model_type, training_trace_set_paths, validation_trace_set_paths, conf, hamming=conf.hamming, subtype=subtype, request_id=self.request.id) print("Getting shape of data...") x, _ = training_iterator.next() input_shape = x.shape[1:] # Shape excluding batch print("Shape of data to train: %s" % str(input_shape)) # Select model model = None if conf.update or conf.testrank: # Load existing model to update or test model = ai.AI(conf, model_type) model.load() else: # Create new model if model_type == 'aicorrnet': model = ai.AICorrNet(conf, input_dim=input_shape[0]) elif model_type == 'aishacpu': model = ai.AISHACPU(conf, input_shape=input_shape, subtype=subtype) elif model_type == 'aishacc': model = ai.AISHACC(conf, input_shape=input_shape) elif model_type == 'aiascad': model = ai.AIASCAD(conf, input_shape=input_shape) elif model_type == 'autoenc': model = ai.AutoEncoder(conf, input_dim=input_shape[0]) else: raise EMMAException("Unknown model type %s" % model_type) logger.info(model.info()) if conf.tfold: # Train t times and generate tfold rank summary model.train_t_fold(training_iterator, batch_size=conf.batch_size, epochs=conf.epochs, num_train_traces=45000, t=10, rank_trace_step=10, conf=conf) elif conf.testrank: # TODO this should not be in aitrain; refactor model.test_fold(validation_iterator, rank_trace_step=10, conf=conf, max_traces=5000) else: # Train once model.train_generator(training_iterator, validation_iterator, epochs=conf.epochs, workers=1)