Exemple #1
0
    def __call__(self, *args, **kwargs):
        # This is the main entrance to run configuration testing
        logger.debug("In the __call__ function")

        # Read through all samples in the input file
        while self.parser.has_next():
            logger.info('Starting new sample')

            # Read input from file
            self._read_next_input()

            batch_number = self.parser.samples_read() // self.batch_size

            logger.info('Testing configuration {}/{} for batch {}'.format(
                1, self.num_configurations, batch_number))
            self._test_configuration("random")

            logger.info('Testing configuration {}/{} for batch {}'.format(
                2, self.num_configurations, batch_number))
            self._test_configuration("sensitivity_analysis")

            # Run test for each configuration in the configuration list.
            for idx, config in enumerate(self.configurations[:1]):
                logger.info('Testing configuration {}/{} for batch {}'.format(
                    idx + 3, self.num_configurations, batch_number))
                self._test_configuration(config)

        logger.info("Done testing all of the input to the given file")
Exemple #2
0
    def _read_next_input(self):
        logger.debug("Reading input")

        self.features_read = self.parser.next_batch()
        logger.debug("Read labels: \n{}".format(self.features_read['label']))

        # Prepare separate graph for reading input
        graph = tf.Graph()
        with graph.as_default():
            *_, input_model, input_feed_dict = _construct_X_and_feed_dict(
                self.features_read)

            # Required variable in order to use tf.make_template()
            global_step = tf.Variable(0, name='global_step', trainable=False)

            logger.debug('Making sirs model for the input')

            self.input_model = sirs_classifier.create_model(
                None,
                global_step,
                input_model,
                False,
                INPUT_SIZE,
                CONTEXT_SIZE,
                NUM_CLASSES,
                USE_TEXT,
                use_char_autoencoder=USE_AUTOENCODER,
                use_doc2vec=USE_DOC2VEC)

            y_hat = self.input_model['y_hat']
            y_maxes = tf.reduce_max(y_hat, axis=2)
            y_argmax = tf.cast(tf.argmax(y_maxes, axis=1), tf.int32)

            self.input_model['offsets'] = tf.pad(
                tf.expand_dims(y_argmax, axis=1), [[0, 0], [3, 1]])

            with tf.Session() as s:
                s.run([tf.local_variables_initializer()])

                # Create a tf Saver that can be used to restore a pre-trained model below
                saver = tf.train.Saver()
                self.restore_checkpoint(s, saver)

                # Read the next input
                self.input_selection, y_hat = s.run(
                    [self.input_model['offsets'], self.input_model['y_hat']],
                    feed_dict=input_feed_dict)

        # Reduce max sequence length
        self.max_sequence = y_hat.shape[1]

        # Tell the parser that we read a batch
        self.parser.did_read_batch()

        logger.info("Read sample {} with label \n{}".format(
            self.parser.samples_read(), self.features_read['label']))
Exemple #3
0
    def __init__(self, dest) -> None:
        self._destination_folder = dest[:-1] if dest[-1] in ['/', '\\'
                                                             ] else dest

        i = 1
        destination = self._destination_folder
        while os.path.exists(destination):
            destination = "{}_{:02}".format(self._destination_folder, i)
            i += 1

        self._destination_folder = destination
        logger.info("Writing results to {}".format(self._destination_folder))
        os.makedirs(self._destination_folder)
Exemple #4
0
    def __init__(self, input_features, model, destination, batch_size,
                 **kwargs):
        # Remember the batch_size
        self.batch_size = batch_size
        self.confs = kwargs

        # Remember the file containing the model
        self.model_file = model

        # Dict used to holde the input elements (as tensors)
        self.features_batch = {
            'features': None,
            'context': None,
            'seq_len': None,
            'label:': None,
            'forloeb': None,
        }

        # Result writer used to append benchmark results to files according to destination directory
        self.writer = ResultWriter(destination)

        # Gets all the different configurations
        self.configurations = get_configurations()

        # Count the configurations and add sensitivity analysis and random
        self.num_configurations = len(self.configurations) + 2

        self.parser = NpyFeatureParser(input_features, batch_size)

        logger.info("Testing {} samples from {}".format(
            self.parser.get_record_count(), input_features))
        logger.info("Writing results to folder: {}".format(destination))
        logger.info("Model used: {}".format(model))
        logger.info("Batch size: {}".format(batch_size))
def _show_results(**kwargs):
    if kwargs['file']:
        scores = [ScoreParser(f) for f in kwargs['file']]
    else:
        scores = [
            ScoreParser(join(kwargs['benchmark_dir'], f))
            for f in listdir(kwargs['benchmark_dir'])
            if isfile(join(kwargs['benchmark_dir'], f)) and f.endswith(".res")
        ]

    scores.sort(key=lambda x: x.AOPC, reverse=True)
    to_print = [
        sc for sc in scores if sc.title in ['Sensitivity analysis', 'Random']
    ]
    rest = [
        sc for sc in scores
        if sc.title not in ['Sensitivity analysis', 'Random']
    ]

    to_print.extend(rest[:1])

    logger.info("------------ -   best    - -------------------------")
    for score in to_print:
        logger.info("\n" + str(score))

    if kwargs['plot']:
        write_scores_to_plot(to_print, **kwargs)

    logger.info("------------ -   worst    - -------------------------")
    logger.info("\n" + str(rest[-1]))
Exemple #6
0
def do_nan_searching(configs, selected_model, model_file, model_name,
                     **kwargs):
    iterations = kwargs['test_size'] // kwargs['batch_size']
    feed = DataFeed()

    start = kwargs['start']
    end = kwargs['end']
    if end == -1:
        end = len(configs)

    found_nans = 0
    for config_idx, config in enumerate(configs[start:end]):
        graph = tf.Graph()
        feed.reset_permutation()

        with graph.as_default():
            x = tf.placeholder(tf.float32, shape=[None, 784])
            y_ = tf.placeholder(tf.float32, shape=[None, 10])

            is_training = False

            y, _ = selected_model['nn'](x, y_, is_training)

            print("Testing ({}/{}), Nan-cnt {}: {}".format(
                config_idx, end - start, found_nans, config))
            explanation = lrp.lrp(x, y, config)

            init = get_initializer(False)

            important_variables = tf.trainable_variables()
            important_variables.extend(
                [v for v in tf.global_variables() if 'moving_' in v.name])

            saver = tf.train.Saver(important_variables)

            with tf.Session() as s:
                # Initialize stuff and restore model
                s.run(init)
                saver.restore(s, model_file)

                found_nans += do_search(config, s, feed, explanation,
                                        iterations, kwargs['batch_size'], x)
    logger.info(
        "Found nans for {} configurations in total for model {}".format(
            found_nans, model_name))
    print("Found nans for {} configurations in total for model {}".format(
        found_nans, model_name))
    return model_name, found_nans
Exemple #7
0
    def _read_next_input(self):
        logger.debug("Starting input session")

        # Start queue runners for the reader queue to work
        tf.train.start_queue_runners(sess=self.input_session)

        # Read the next input
        self.features_read, self.input_selection, y_hat = self.input_session.run(
            [
                self.next_batch, self.input_model['offsets'],
                self.input_model['y_hat']
            ])

        # Reduce max sequence length
        self.max_sequence = y_hat.shape[1]

        # Tell the parser that we read a batch
        self.parser.did_read_batch()

        logger.info("Read sample {} with label \n{}".format(
            self.parser.samples_read(), self.features_read['label']))
Exemple #8
0
def _do_test(test_file, **kwargs):
    with tf.Graph().as_default():

        parser = FeatureParser(test_file, INPUT_SIZE, CONTEXT_SIZE, **kwargs)
        features = parser.next_batch()
        num_records = parser.get_record_count()

        # Keep scope empty
        sirs_model = tf.make_template('', _create_model)
        model = sirs_model(features)

        valid_scores = []
        scores = []
        for s in kwargs['scores']:
            if s in model:
                valid_scores.append(s)

                m_score = model[s]
                if s == 'accuracy':
                    m_score = tf.reduce_mean(tf.cast(m_score, tf.float32))
                scores.append(m_score)
            else:
                logger.error("Score '{}' is not supported.".format(s))

        with tf.Session() as sess:
            # Initialize variables
            sess.run([tf.local_variables_initializer()])

            # Restore model
            _restore_checkpoint(sess, kwargs['model'])

            # Find scores
            evaluations = _evaluate_model(sess, model, num_records, scores,
                                          **kwargs)

        # Print scores
        logger.info("Scores: ")
        for sc in zip(valid_scores, evaluations):
            print(sc)
            logger.info("{:<10}: {:10f}".format(*sc))
Exemple #9
0
    def write_result(self, config, label, prediction, results):
        file_name = "{}/{}.res".format(self._destination_folder, config)
        exists = os.path.isfile(file_name)

        label = label.reshape((label.shape[0], 1))
        prediction = prediction.reshape((prediction.shape[0], 1))
        ls = np.concatenate([label, prediction], axis=1)
        fmt = "{:>10d} " * 2 + "{:>10.6f} " * results.shape[2]

        with open(file_name, 'a') as f:
            if not exists:
                num_iterations = results.shape[2]
                str_format = "{:10} {:10} " + "{:>10} " * num_iterations
                headings = str_format.format(
                    'label', 'pred', *[i
                                       for i in range(num_iterations)]) + '\n'
                f.write(headings)

            for i, sample in enumerate(results):
                for row in sample:
                    f.write(fmt.format(*ls[i], *row) + "\n")
                f.write("\n")

        logger.info("Saved result for config {}".format(config))
Exemple #10
0
def run_model(selected_model_names, **kwargs):
    results = []
    for selected_model_name in selected_model_names:
        print("#" * 40)
        print("Searching in model: {}".format(selected_model_name.title()))

        selected_model = models[selected_model_name]

        configs = configurations.get_configurations_for_layers(
            *selected_model['confs'])

        model_file = '%s/%s.ckpt' % (model_dir, selected_model_name)

        res = do_nan_searching(configs,
                               selected_model,
                               model_file,
                               model_name=selected_model_name,
                               **kwargs)
        results.append(res)

    logger.info("Summary:")
    for r in results:
        logger.info("\t%s: %3i" % r)
        print("\t%s: %3i" % r)
Exemple #11
0
 def write_explanation(self, config, R):
     self._write_sparse_tensor('rel_{}'.format(config), R)
     logger.info("Saved relevances for batch")
Exemple #12
0
 def write_input(self, X):
     self._write_sparse_tensor('inputs', X)
     logger.info("Saved input batch")
Exemple #13
0
    def _test_configuration(self, config):
        # Start new graph for this configuration
        graph = tf.Graph()
        with graph.as_default():

            logger.debug(
                "Start of new test graph with config {}".format(config))

            X, X_reordered, self.features_batch, feed_dict = _construct_X_and_feed_dict(
                self.features_read)

            # Prepare template (that uses parameter sharing across calls to sirs_template)
            sirs_template = tf.make_template('', self.create_model)

            logger.debug("Building graph for forward pass")
            logger.debug(sirs_template)

            # Compute the DRN graph
            model = sirs_template(X_reordered)

            should_write_input = False
            if isinstance(config, str):
                # The config is either random or SA
                if config == 'random':
                    # Compute random relevances
                    logger.debug("Building random graph")
                    R = get_random_relevance(X)
                    should_write_input = True
                else:
                    # Compute sensitivity analysis
                    logger.debug("Building SA graph")
                    R = get_sensitivity_analysis(X, model['y_hat'])
            else:
                logger.debug('Building lrp graph')
                R = lrp.lrp(X, model['y_hat'], config)
                logger.debug('Done building lrp graph')

            logger.debug("Instantiating pertubation class")
            # Make pertuber for X and R that prepares a number of pertubations of X
            pertuber = Pertuber(X, R, self.batch_size, **self.confs)

            # Build the pertubation graph
            benchmark = pertuber.build_pertubation_graph(sirs_template)

            # Create a tf Saver that can be used to restore a pre-trained model below
            saver = tf.train.Saver()

            with tf.Session(graph=graph) as s:
                logger.debug("Initializing vars and restoring model")
                # Initialize the local variables and restore the model that was trained earlier
                s.run([tf.local_variables_initializer()])
                self.restore_checkpoint(s, saver)

                logger.debug("Restored model. Now starting threads.")

                # Create the threads that run the model
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(coord=coord, sess=s)

                try:
                    # Run the benchmarks. Shapes:
                    # Benchmark_result: batch_size, pertubations, num_classes
                    # y                 batch_size, 1
                    # y_hat             batch_size, num_classes
                    logger.debug("Starting session for benchmarks")
                    benchmark_result, expl, y, y_hat = self.run_model(
                        [benchmark, R, model['y'], model['y_hat']],
                        model,
                        feed_dict=feed_dict,
                        session=s)
                    logger.debug("Session done")

                    # Remove extra dimension from y
                    # y shape: (batch_size,)
                    y = y[:, 0]

                    # Find argmax for y_hat
                    # y_hat shape: (batch_size,)
                    y_hat = np.argmax(y_hat, axis=1)

                    # Write results to file
                    logger.debug("Writing result to file")
                    self.writer.write_result(config, y, y_hat,
                                             benchmark_result)

                    logger.debug("Writing explanation to file")
                    self.writer.write_explanation(config, expl)

                    if should_write_input:
                        logger.debug("Writing input to file")
                        self.writer.write_input(self.features_read['features'])

                except tf.errors.OutOfRangeError:
                    logger.debug("Done with the testing")
                except KeyboardInterrupt:
                    logger.debug("Process interrupted by user. Wrapping up.")
                finally:
                    coord.request_stop()

                logger.debug("Joining threads")
                coord.join(threads)
                logger.info("Done with test")
Exemple #14
0
    def __init__(self, input_features, model, destination, batch_size,
                 **kwargs):
        # Remember the batch_size
        self.batch_size = batch_size
        self.confs = kwargs

        # Remember the file containing the model
        self.model_file = model

        # Dict used to holde the input elements (as tensors)
        self.features_batch = {
            'features': None,
            'context': None,
            'seq_len': None,
            'label:': None,
            'forloeb': None,
        }

        # Result writer used to append benchmark results to files according to destination directory
        self.writer = ResultWriter(destination)

        # Gets all the different configurations
        self.configurations = get_configurations()

        # Count the configurations and add sensitivity analysis and random
        self.num_configurations = len(self.configurations) + 2

        # Prepare separate graph for reading input
        self.input_graph = tf.Graph()
        with self.input_graph.as_default():
            self.parser = FeatureParser(input_features, INPUT_SIZE,
                                        CONTEXT_SIZE, batch_size)
            self.next_batch = self.parser.next_batch()

            # Required variable in order to use tf.make_template()
            global_step = tf.Variable(0, name='global_step', trainable=False)

            logger.debug('Making sirs model for the input')

            self.input_model = sirs_classifier.create_model(
                None,
                global_step,
                self.next_batch,
                False,
                INPUT_SIZE,
                CONTEXT_SIZE,
                NUM_CLASSES,
                USE_TEXT,
                use_char_autoencoder=USE_AUTOENCODER,
                use_doc2vec=USE_DOC2VEC)

            y_hat = self.input_model['y_hat']
            y_maxes = tf.reduce_max(y_hat, axis=2)
            y_argmax = tf.cast(tf.argmax(y_maxes, axis=1), tf.int32)

            self.input_model['offsets'] = tf.pad(
                tf.expand_dims(y_argmax, axis=1), [[0, 0], [3, 1]])

            self.input_session = tf.Session(graph=self.input_graph)
            self.input_session.run([tf.local_variables_initializer()])

            # Create a tf Saver that can be used to restore a pre-trained model below
            saver = tf.train.Saver()
            self.restore_checkpoint(self.input_session, saver)

        logger.info("Testing {} samples from {}".format(
            self.parser.get_record_count(), input_features))
        logger.info("Writing results to folder: {}".format(destination))
        logger.info("Model used: {}".format(model))
        logger.info("Batch size: {}".format(batch_size))
Exemple #15
0
    def _test_configuration(self, config):
        # Start new graph for this configuration
        graph = tf.Graph()
        with graph.as_default():

            logger.debug(
                "Start of new test graph with config {}".format(config))

            # Dictionary to hold all the 'feed_dict' parameters for session.run
            to_feed = dict()

            # Construct sparse tensors from placeholders
            # X read from the input (this is a SparseTensorValue , i.e. numpy like)
            X_read = self.features_read['features']

            # Placeholders to reconstruct X in this new graph
            X_indices = tf.placeholder(tf.int64, X_read.indices.shape)
            X_values = tf.placeholder(tf.float32, X_read.values.shape)
            X_shape = tf.placeholder(tf.int64, np.size(X_read.dense_shape))

            X = tf.SparseTensor(X_indices, X_values, X_shape)

            # Do sparse reorder to ensure that LRP (and other sparse matrix operations) works
            X_reordered = tf.sparse_reorder(X)

            # Fill actual values into the three placeholders above
            to_feed[X_indices] = X_read.indices
            to_feed[X_values] = X_read.values
            to_feed[X_shape] = X_read.dense_shape

            # Do the same sparse tensor reconstruction trick for the context
            # C read from the input (this is a SparseTensorValue, i.e. numpy like)
            C_read = self.features_read['context']

            # Placeholders to reconstruct C in this new graph
            C_indices = tf.placeholder(tf.int64, C_read.indices.shape)
            C_values = tf.placeholder(tf.float32, C_read.values.shape)
            C_shape = tf.placeholder(tf.int64, np.size(C_read.dense_shape))

            C = tf.SparseTensor(C_indices, C_values, C_shape)

            # Fill actual values into the three placeholders for C
            to_feed[C_indices] = C_read.indices
            to_feed[C_values] = C_read.values
            to_feed[C_shape] = C_read.dense_shape

            # Store sparse context tensor
            self.features_batch['context'] = C

            # Same circus for seq_len
            seq_len = tf.placeholder(tf.int64, (None, ))
            self.features_batch['seq_len'] = seq_len

            # Same circus for label
            label = tf.placeholder(tf.int64, self.features_read['label'].shape)
            self.features_batch['label'] = label

            # Same circus for forloeb
            forloeb = tf.placeholder(tf.int64,
                                     self.features_read['forloeb'].shape)
            self.features_batch['forloeb'] = forloeb

            # Fill actual values into seq_len, label, forloeb
            to_feed[seq_len] = self.features_read['seq_len']
            to_feed[label] = self.features_read['label']
            to_feed[forloeb] = self.features_read['forloeb']

            # Prepare template (that uses parameter sharing across calls to sirs_template)
            self.first_template_use = True
            sirs_template = tf.make_template('', self.create_model)

            logger.debug("Building graph for forward pass")
            # Compute the DRN graph
            model = sirs_template(X_reordered)

            should_write_input = False
            if isinstance(config, str):
                # The config is either random or SA
                if config == 'random':
                    # Compute random relevances
                    logger.debug("Building random graph")
                    R = get_random_relevance(X)
                    should_write_input = True
                else:
                    # Compute sensitivity analysis
                    logger.debug("Building SA graph")
                    R = get_sensitivity_analysis(X, model['y_hat'])
            else:
                logger.debug('Building lrp graph')
                R = lrp.lrp(X, model['y_hat'], config)
                logger.debug('Done building lrp graph')

            logger.debug("Instantiating pertubation class")
            # Make pertuber for X and R that prepares a number of pertubations of X
            pertuber = Pertuber(X, R, self.batch_size, **self.confs)

            # Build the pertubation graph
            benchmark = pertuber.build_pertubation_graph(sirs_template)

            # Create a tf Saver that can be used to restore a pre-trained model below
            saver = tf.train.Saver()

            with tf.Session(graph=graph) as s:
                logger.debug("Initializing vars and restoring model")
                # Initialize the local variables and restore the model that was trained earlier
                s.run([tf.local_variables_initializer()])
                self.restore_checkpoint(s, saver)

                logger.debug("Restored model. Now starting threads.")

                # Create the threads that run the model
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(coord=coord, sess=s)

                try:
                    # Run the benchmarks. Shapes:
                    # Benchmark_result: batch_size, pertubations, num_classes
                    # y                 batch_size, 1
                    # y_hat             batch_size, num_classes
                    logger.debug("Starting session for benchmarks")
                    benchmark_result, expl, y, y_hat = self.run_model(
                        [benchmark, R, model['y'], model['y_hat']],
                        model,
                        feed_dict=to_feed,
                        session=s)
                    logger.debug("Session done")

                    # Remove extra dimension from y
                    # y shape: (batch_size,)
                    y = y[:, 0]

                    # Find argmax for y_hat
                    # y_hat shape: (batch_size,)
                    y_hat = np.argmax(y_hat, axis=1)

                    # Write results to file
                    logger.debug("Writing result to file")
                    self.writer.write_result(config, y, y_hat,
                                             benchmark_result)

                    logger.debug("Writing explanation to file")
                    self.writer.write_explanation(config, expl)

                    if should_write_input:
                        logger.debug("Writing input to file")
                        self.writer.write_input(X_read)

                except tf.errors.OutOfRangeError:
                    logger.debug("Done with the testing")
                except KeyboardInterrupt:
                    logger.debug("Process interrupted by user. Wrapping up.")
                finally:
                    coord.request_stop()

                logger.debug("Joining threads")
                coord.join(threads)
                logger.info("Done with test")