Esempio n. 1
0
    def test_corrtrain_correlation_multi(self):
        from leakagemodels import LeakageModel
        """
        Artificial example to test AICorrNet and trace processing with multiple leakage values and multiple subkeys.
        """

        # ------------------------------
        # Generate data
        # ------------------------------
        traces = [  # Contains abs(trace). Shape = [trace, point]
            [1, 1, 1, -15],
            [-4, 2, 2, -12],
            [10, 3, 3, 8],
            [8, 1, 1, -14],
            [9, 0, -3, 8],
        ]

        plaintexts = [
            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 13, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        ]

        keys = [
            [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        ]

        # Convert to numpy
        traces = np.array(traces)
        plaintexts = np.array(plaintexts)
        keys = np.array(keys)

        trace_set = TraceSet(name='test',
                             traces=traces,
                             plaintexts=plaintexts,
                             keys=keys)

        # ------------------------------
        # Preprocess data
        # ------------------------------
        conf = Namespace(
            max_cache=0,
            augment_roll=False,
            augment_noise=False,
            normalize=False,
            traces_per_set=4,
            online=False,
            dataset_id='qa',
            cnn=False,
            leakage_model=LeakageModelType.AES_MULTI,
            input_type=AIInputType.SIGNAL,
            augment_shuffle=True,
            n_hidden_layers=1,
            n_hidden_nodes=256,
            activation='leakyrelu',
            metric_freq=100,
            regularizer=None,
            reglambda=0.001,
            model_suffix=None,
            use_bias=True,
            batch_norm=True,
            hamming=False,
            key_low=1,
            key_high=3,
            loss_type='correlation',
            lr=0.001,
            epochs=5000,
            batch_size=512,
            norank=False,
        )
        it_dummy = AICorrSignalIterator([],
                                        conf,
                                        batch_size=10000,
                                        request_id=None,
                                        stream_server=None)
        x, y = it_dummy._preprocess_trace_set(trace_set)

        # ------------------------------
        # Train and obtain encodings
        # ------------------------------
        model = ai.AICorrNet(conf, input_dim=4, name="test")
        print(model.info())
        rank_cb = rank.CorrRankCallback(conf,
                                        '/tmp/deleteme/',
                                        save_best=False,
                                        save_path=None)
        rank_cb.set_trace_set(trace_set)

        if model.using_regularization:
            print(
                "Warning: cant do correlation loss test because regularizer will influence loss function"
            )
            return

        # Find optimal weights
        print("The x (EM samples) and y (leakage model values) are:")
        print(x)
        print(y)
        print(
            "When feeding x through the model without training, the encodings become:"
        )
        print(model.predict(x))
        print("Training now")
        model.train_set(x,
                        y,
                        save=False,
                        epochs=conf.epochs,
                        extra_callbacks=[rank_cb])
        print("Done training")

        # Get the encodings of the input data using the same approach used in ops.py corrtest (iterate over rows)
        result = []
        for i in range(0, x.shape[0]):
            result.append(
                model.predict(np.array([x[i, :]], dtype=float))[0]
            )  # Result contains sum of points such that corr with y[key_index] is maximal for all key indices. Shape = [trace, 16]
        result = np.array(result)
        print(
            "When feeding x through the model after training, the encodings for key bytes %d to %d become:\n %s"
            % (conf.key_low, conf.key_high, str(result)))

        # ------------------------------
        # Check loss function
        # ------------------------------
        # Evaluate the model to get the loss for the encodings
        predicted_loss = model.model.evaluate(x, y, verbose=0)

        # Manually calculate the loss using numpy to verify that we are learning a correct correlation
        calculated_loss = 0
        num_keys = (conf.key_high - conf.key_low)
        num_outputs = LeakageModel.get_num_outputs(conf) // num_keys
        for i in range(0, num_keys):
            subkey_hws = y[:, i * num_outputs:(i + 1) * num_outputs]
            subkey_encodings = result[:, i * num_outputs:(i + 1) * num_outputs]
            print("Subkey %d HWs   : %s" % (i + conf.key_low, str(subkey_hws)))
            print("Subkey %d encodings: %s" %
                  (i + conf.key_low, str(subkey_encodings)))
            y_key = subkey_hws.reshape([-1, 1])
            y_pred = subkey_encodings.reshape([-1, 1])
            print("Flattened subkey %d HWs   : %s" %
                  (i + conf.key_low, str(y_key)))
            print("Flattened subkey %d encodings: %s" %
                  (i + conf.key_low, str(y_pred)))

            # Calculate correlation (numpy approach)
            corr_key_i = np.corrcoef(y_pred[:, 0], y_key[:, 0],
                                     rowvar=False)[1, 0]
            print("corr_num: %s" % corr_key_i)

            calculated_loss += 1.0 - corr_key_i

        print("These values should be close:")
        print("Predicted loss: %s" % str(predicted_loss))
        print("Calculated loss: %s" % str(calculated_loss))
        self.assertAlmostEqual(predicted_loss, calculated_loss, places=2)
Esempio n. 2
0
def aitrain(self, training_trace_set_paths, validation_trace_set_paths, conf):
    resolve_paths(
        training_trace_set_paths)  # Get absolute paths for training set
    resolve_paths(
        validation_trace_set_paths)  # Get absolute paths for validation set

    # Hardcoded stuff
    subtype = 'custom'

    # Determine type of model to train
    model_type = get_conf_model_type(
        conf
    )  # TODO: Refactor 'name' to 'model_type' everywhere and let user specify modeltype in [] params of "train" activity

    # Select training iterator (gathers data, performs augmentation and preprocessing)
    training_iterator, validation_iterator = aiiterators.get_iterators_for_model(
        model_type,
        training_trace_set_paths,
        validation_trace_set_paths,
        conf,
        hamming=conf.hamming,
        subtype=subtype,
        request_id=self.request.id)

    print("Getting shape of data...")
    x, _ = training_iterator.next()
    input_shape = x.shape[1:]  # Shape excluding batch
    print("Shape of data to train: %s" % str(input_shape))

    # Select model
    model = None
    if conf.update or conf.testrank:  # Load existing model to update or test
        model = ai.AI(conf, model_type)
        model.load()
    else:  # Create new model
        if model_type == 'aicorrnet':
            model = ai.AICorrNet(conf, input_dim=input_shape[0])
        elif model_type == 'aishacpu':
            model = ai.AISHACPU(conf, input_shape=input_shape, subtype=subtype)
        elif model_type == 'aishacc':
            model = ai.AISHACC(conf, input_shape=input_shape)
        elif model_type == 'aiascad':
            model = ai.AIASCAD(conf, input_shape=input_shape)
        elif model_type == 'autoenc':
            model = ai.AutoEncoder(conf, input_dim=input_shape[0])
        else:
            raise EMMAException("Unknown model type %s" % model_type)
    logger.info(model.info())

    if conf.tfold:  # Train t times and generate tfold rank summary
        model.train_t_fold(training_iterator,
                           batch_size=conf.batch_size,
                           epochs=conf.epochs,
                           num_train_traces=45000,
                           t=10,
                           rank_trace_step=10,
                           conf=conf)
    elif conf.testrank:  # TODO this should not be in aitrain; refactor
        model.test_fold(validation_iterator,
                        rank_trace_step=10,
                        conf=conf,
                        max_traces=5000)
    else:  # Train once
        model.train_generator(training_iterator,
                              validation_iterator,
                              epochs=conf.epochs,
                              workers=1)