def eval_epoch(model, data, config):
    model.eval()
    n_iter = 0
    epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"], data["valid_y"], config["batch_size"], is_train=False)
    epoch_loss = 0
    corrects = 0
    criterion = nn.CrossEntropyLoss()

    for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x):
        batch_x = torch.LongTensor(batch_x)
        batch_y = torch.LongTensor(batch_y)
        lengths_x = torch.LongTensor(length_x)

        if config["cuda"]:
            batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(), lengths_x.cuda()

        # optimizer.zero_grad()
        pred = model(batch_x)['logits']
        loss = criterion(pred, batch_y)
        n_iter += 1
        epoch_loss += float(loss)

        batch_corrects = int((torch.max(pred, 1)[1].view(batch_y.size()).data == batch_y.data).sum())
        corrects += batch_corrects

        # if n_iter % 200 == 0:
        #     eval()
        #     model.train()
        del batch_x, batch_y, pred, loss

    return epoch_loss / len(data["valid_y"]), corrects / len(data["valid_y"]) * 100
Exemple #2
0
def eval_epoch_with_thresholds(model, data, config, thresholds):
    """
    Evaluate the thresholded model on the validation set
    """
    model.eval()
    n_iter = 0
    epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"],
                                            data["valid_y"],
                                            config["batch_size"],
                                            is_train=False)
    epoch_loss = 0
    corrects = 0
    criterion = nn.CrossEntropyLoss()

    thresh = [
        torch.FloatTensor(thresh_row)
        for thresh_row in thresholds["thresholds"]
    ]
    if config["cuda"]:
        thresh = [t.cuda() for t in thresh]

    for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x):
        batch_x = torch.LongTensor(batch_x)
        batch_y = torch.LongTensor(batch_y)
        lengths_x = torch.LongTensor(length_x)

        if config["cuda"]:
            batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(
            ), lengths_x.cuda()

        pred = model(batch_x, thresh)['logits']
        loss = criterion(pred, batch_y)
        n_iter += 1
        epoch_loss += float(loss)

        batch_corrects = int(
            (torch.max(pred,
                       1)[1].view(batch_y.size()).data == batch_y.data).sum())
        corrects += batch_corrects

        # if n_iter % 200 == 0:
        #     print(n_iter)
        del batch_x, batch_y, pred, loss

    return epoch_loss / len(data["valid_y"]), corrects / len(
        data["valid_y"]) * 100
def run_adversary_attack(model, data, config, advers_attack):

    model.eval()
    n_iter = 0
    #epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"], data["valid_y"], config["batch_size"], is_train=False)
    epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"],
                                            data["valid_y"],
                                            1,
                                            is_train=False)  #num_examples=1)
    epoch_loss = 0
    corrects = 0
    criterion = nn.CrossEntropyLoss()

    results = {
        "TP":
        [0,
         0],  # number of examples changed and unchanged by adversarial attack
        "TN": [0, 0],
        "FP": [0, 0],
        "FN": [0, 0],
    }

    #print(len(epoch_x) , " examples")

    results_extended = []

    for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x):
        #batch_x_advers = [a+advers_attack for a in batch_x ]
        #length_x_advers = [a+len(advers_attack) for a in  length_x]

        batch_x_orig = batch_x.copy()
        length_x_orig = length_x.copy()
        #batch_x_advers_orig = batch_x_advers.copy()

        batch_x = torch.LongTensor(batch_x)
        batch_y = torch.LongTensor(batch_y)
        lengths_x = torch.LongTensor(length_x)

        if config["cuda"]:
            batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(
            ), lengths_x.cuda()

        # optimizer.zero_grad()
        pred = model(batch_x)['logits']
        pred_class = torch.max(pred, 1)[1].view(batch_y.size()).data
        batch_y_orig = batch_y.clone()
        if config["cuda"]:
            pred_class = pred_class.cpu().detach().numpy()
            batch_y = batch_y.cpu().detach().numpy()

        #this only works if batch size is 1

        TYPE = ""
        if pred_class[0] == 1 and batch_y[0] == 1: TYPE = "TP"
        elif pred_class[0] == 0 and batch_y[0] == 0: TYPE = "TN"
        elif pred_class[0] == 1 and batch_y[0] == 0: TYPE = "FP"
        elif pred_class[0] == 0 and batch_y[0] == 1: TYPE = "FN"

        #if TYPE=="TN": # Adversarial attack on negative samples. If the model correctly predicts that a sample belongs to class 0.
        # TN
        if True:
            batch_x_advers_orig = [
                a + advers_attack[TYPE] for a in batch_x_orig
            ]
            length_x_advers_orig = [
                a + len(advers_attack[TYPE]) for a in length_x_orig
            ]

            batch_x_advers = torch.LongTensor(batch_x_advers_orig)
            lengths_x_advers = torch.LongTensor(length_x_advers_orig)

            if config["cuda"]:
                batch_x_advers, lengths_x_advers = batch_x_advers.cuda(
                ), lengths_x_advers.cuda()

            # optimizer.zero_grad()
            pred_advers = model(batch_x_advers)['logits']
            pred_class_advers = torch.max(pred_advers,
                                          1)[1].view(batch_y_orig.size()).data
            if config["cuda"]:
                pred_class_advers = pred_class_advers.cpu().detach().numpy()

            if pred_class[0] == pred_class_advers[0]: results[TYPE][1] += 1
            else: results[TYPE][0] += 1

            results_extended.append([
                batch_y[0], pred_class[0], pred_class_advers[0],
                length_x_orig[0]
            ])

            if False:
                #print(data["idx_to_word"].keys(), len(data["idx_to_word"].keys()))
                print(
                    "Original sentence : ",
                    " ".join([data["idx_to_word"][a]
                              for a in batch_x_orig[0]]))
                print(
                    "Adversarial sentence : ", " ".join([
                        data["idx_to_word"][a] for a in batch_x_advers_orig[0]
                    ]))
                print("Truth {}, Pred {}, Adversarial {}".format(
                    batch_y[0], pred_class[0], pred_class_advers[0]))

    return results, results_extended
Exemple #4
0
def main(_):

    config = tf.ConfigProto(inter_op_parallelism_threads=num_inter_op_threads,
                            intra_op_parallelism_threads=num_intra_op_threads)

    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()  # For Tensorflow trace

    cluster = tf.train.ClusterSpec({"ps": ps_list, "worker": worker_list})
    server = tf.train.Server(cluster, job_name=job_name, task_index=task_index)

    is_sync = (FLAGS.is_sync == 1)  # Synchronous or asynchronous updates
    is_chief = (task_index == 0)  # Am I the chief node (always task 0)

    greedy = tf.contrib.training.GreedyLoadBalancingStrategy(
        num_tasks=len(ps_hosts), load_fn=tf.contrib.training.byte_size_load_fn)

    if job_name == "ps":

        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:ps/task:{}".format(task_index),
                    ps_tasks=len(ps_hosts),
                    ps_strategy=greedy,
                    cluster=cluster)):

            sess = tf.Session(server.target, config=config)
            queue = create_done_queue(task_index)

            print("*" * 30)
            print("\nParameter server #{} on {}.\n\n" \
             "Waiting on workers to finish.\n\nPress CTRL-\\ to terminate early.\n"  \
             .format(task_index, ps_hosts[task_index]))
            print("*" * 30)

            # wait until all workers are done
            for i in range(len(worker_hosts)):
                sess.run(queue.dequeue())
                print("Worker #{} reports job finished.".format(i))

            print("Parameter server #{} is quitting".format(task_index))
            print("Training complete.")

    elif job_name == "worker":

        if is_chief:
            print("I am chief worker {} with task #{}".format(
                worker_hosts[task_index], task_index))
        else:
            print("I am worker {} with task #{}".format(
                worker_hosts[task_index], task_index))

        if len(ps_list) > 0:
            setDevice = tf.train.replica_device_setter(
                worker_device="/job:worker/task:{}".format(task_index),
                ps_tasks=len(ps_hosts),
                ps_strategy=greedy,
                cluster=cluster)
        else:
            setDevice = "/cpu:0"  # No parameter server so put variables on chief worker

        with tf.device(setDevice):

            global_step = tf.Variable(0, name="global_step", trainable=False)

            # Load the data
            imgs_train, msks_train, imgs_test, msks_test = load_all_data()
            train_length = imgs_train.shape[0]  # Number of train datasets
            test_length = imgs_test.shape[0]  # Number of test datasets
            """
			BEGIN: Define our model
			"""

            imgs = tf.placeholder(tf.float32,
                                  shape=(None, msks_train.shape[1],
                                         msks_train.shape[2],
                                         msks_train.shape[3]))

            msks = tf.placeholder(tf.float32,
                                  shape=(None, msks_train.shape[1],
                                         msks_train.shape[2],
                                         msks_train.shape[3]))

            preds = define_model(imgs, FLAGS.use_upsampling,
                                 settings_dist.OUT_CHANNEL_NO)

            print('Model defined')

            loss_value = dice_coef_loss(msks, preds)
            dice_value = dice_coef(msks, preds)

            sensitivity_value = sensitivity(msks, preds)
            specificity_value = specificity(msks, preds)

            test_loss_value = tf.placeholder(tf.float32, ())
            test_dice_value = tf.placeholder(tf.float32, ())

            test_sensitivity_value = tf.placeholder(tf.float32, ())
            test_specificity_value = tf.placeholder(tf.float32, ())
            """
			END: Define our model
			"""

            # Decay learning rate from initial_learn_rate to initial_learn_rate*fraction in decay_steps global steps
            if FLAGS.const_learningrate:
                learning_rate = tf.convert_to_tensor(FLAGS.learning_rate,
                                                     dtype=tf.float32)
            else:
                learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                                           global_step,
                                                           FLAGS.decay_steps,
                                                           FLAGS.lr_fraction,
                                                           staircase=False)

            # Compensate learning rate for asynchronous distributed
            # THEORY: We need to cut the learning rate by at least the number
            # of workers since there are likely to be that many times increased
            # parameter updates.
            # if not is_sync:
            # 	learning_rate /= len(worker_hosts)
            # 	optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            # 	#optimizer = tf.train.AdagradOptimizer(learning_rate)
            # else:
            # 	optimizer = tf.train.AdamOptimizer(learning_rate)

            optimizer = tf.train.AdamOptimizer(learning_rate)

            grads_and_vars = optimizer.compute_gradients(loss_value)
            if is_sync:

                rep_op = tf.train.SyncReplicasOptimizer(
                    optimizer,
                    replicas_to_aggregate=len(worker_hosts),
                    total_num_replicas=len(worker_hosts),
                    use_locking=True)

                train_op = rep_op.apply_gradients(grads_and_vars,
                                                  global_step=global_step)

                init_token_op = rep_op.get_init_tokens_op()

                chief_queue_runner = rep_op.get_chief_queue_runner()

            else:

                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)

            init_op = tf.global_variables_initializer()

            saver = tf.train.Saver()

            # These are the values we wish to print to TensorBoard

            tf.summary.scalar("loss", loss_value)
            tf.summary.histogram("loss", loss_value)
            tf.summary.scalar("dice", dice_value)
            tf.summary.histogram("dice", dice_value)

            tf.summary.scalar("sensitivity", sensitivity_value)
            tf.summary.histogram("sensitivity", sensitivity_value)
            tf.summary.scalar("specificity", specificity_value)
            tf.summary.histogram("specificity", specificity_value)

            tf.summary.image("predictions",
                             preds,
                             max_outputs=settings_dist.TENSORBOARD_IMAGES)
            tf.summary.image("ground_truth",
                             msks,
                             max_outputs=settings_dist.TENSORBOARD_IMAGES)
            tf.summary.image("images",
                             imgs,
                             max_outputs=settings_dist.TENSORBOARD_IMAGES)

            print("Loading epoch")
            epoch = get_epoch(batch_size, imgs_train, msks_train)
            num_batches = len(epoch)
            print("Loaded")

            # Print the percent steps complete to TensorBoard
            #   so that we know how much of the training remains.
            num_steps_tf = tf.constant(num_batches * FLAGS.epochs, tf.float32)
            percent_done_value = tf.constant(100.0) * tf.to_float(
                global_step) / num_steps_tf
            tf.summary.scalar("percent_complete", percent_done_value)

        # Need to remove the checkpoint directory before each new run
        # import shutil
        # shutil.rmtree(CHECKPOINT_DIRECTORY, ignore_errors=True)

        # Send a signal to the ps when done by simply updating a queue in the shared graph
        enq_ops = []
        for q in create_done_queues():
            qop = q.enqueue(1)
            enq_ops.append(qop)

        # Only the chief does the summary
        if is_chief:
            summary_op = tf.summary.merge_all()
        else:
            summary_op = None

        # Add summaries for test data
        # These summary ops are not part of the merge all op.
        # This way we can call these separately.
        test_loss_value = tf.placeholder(tf.float32, ())
        test_dice_value = tf.placeholder(tf.float32, ())

        test_loss_summary = tf.summary.scalar("loss_test", test_loss_value)
        test_dice_summary = tf.summary.scalar("dice_test", test_dice_value)

        test_sens_summary = tf.summary.scalar("sensitivity_test",
                                              test_sensitivity_value)
        test_spec_summary = tf.summary.scalar("specificity_test",
                                              test_specificity_value)

        # TODO:  Theoretically I can pass the summary_op into
        # the Supervisor and have it handle the TensorBoard
        # log entries. However, doing so seems to hang the code.
        # For now, I just handle the summary calls explicitly.
        # import time
        # logDirName = CHECKPOINT_DIRECTORY + "/run" + \
        # 			time.strftime("_%Y%m%d_%H%M%S")

        if FLAGS.use_upsampling:
            method_up = "upsample2D"
        else:
            method_up = "conv2DTranspose"

        logDirName = CHECKPOINT_DIRECTORY + "/unet," + \
           "lr={},{},intra={},inter={}".format(FLAGS.learning_rate,
           method_up, num_intra_op_threads,
           num_inter_op_threads)

        sv = tf.train.Supervisor(
            is_chief=is_chief,
            logdir=logDirName,
            init_op=init_op,
            summary_op=None,
            saver=saver,
            global_step=global_step,
            save_model_secs=60  # Save the model (with weights) everty 60 seconds
        )

        # TODO:
        # I'd like to use managed_session for this as it is more abstract
        # and probably less sensitive to changes from the TF team. However,
        # I am finding that the chief worker hangs on exit if I use managed_session.
        with sv.prepare_or_wait_for_session(server.target,
                                            config=config) as sess:
            #with sv.managed_session(server.target) as sess:

            if sv.is_chief and is_sync:
                sv.start_queue_runners(sess, [chief_queue_runner])
                sess.run(init_token_op)

            step = 0

            progressbar = trange(num_batches * FLAGS.epochs)
            last_step = 0

            # Start TensorBoard on the chief worker
            if sv.is_chief:
                cmd = 'tensorboard --logdir={}'.format(CHECKPOINT_DIRECTORY)
                tb_process = subprocess.Popen(cmd,
                                              stdout=subprocess.PIPE,
                                              shell=True,
                                              preexec_fn=os.setsid)

            while (not sv.should_stop()) and (step <
                                              (num_batches * FLAGS.epochs)):

                batch_idx = step % num_batches  # Which batch is the epoch?

                data = epoch[batch_idx, 0]
                labels = epoch[batch_idx, 1]

                # For n workers, break up the batch into n sections
                # Send each worker a different section of the batch
                data_range = int(batch_size / len(worker_hosts))
                start = data_range * task_index
                end = start + data_range

                feed_dict = {imgs: data[start:end], msks: labels[start:end]}

                history, loss_v, dice_v, step = sess.run(
                    [train_op, loss_value, dice_value, global_step],
                    feed_dict=feed_dict)

                # Print summary only on chief
                if sv.is_chief:

                    summary = sess.run(summary_op, feed_dict=feed_dict)
                    sv.summary_computed(sess, summary)  # Update the summary

                    # Calculate metric on test dataset every epoch
                    if (batch_idx == 0) and (step > num_batches):

                        dice_v_test = 0.0
                        loss_v_test = 0.0
                        sens_v_test = 0.0
                        spec_v_test = 0.0

                        for idx in tqdm(
                                range(0, imgs_test.shape[0] - batch_size,
                                      batch_size),
                                desc="Calculating metrics on test dataset",
                                leave=False):
                            x_test = imgs_test[idx:(idx + batch_size)]
                            y_test = msks_test[idx:(idx + batch_size)]

                            feed_dict = {imgs: x_test, msks: y_test}

                            l_v, d_v, st_v, sp_v = sess.run(
                                [
                                    loss_value, dice_value, sensitivity_value,
                                    specificity_value
                                ],
                                feed_dict=feed_dict)

                            dice_v_test += d_v / (test_length // batch_size)
                            loss_v_test += l_v / (test_length // batch_size)
                            sens_v_test += st_v / (test_length // batch_size)
                            spec_v_test += sp_v / (test_length // batch_size)


                        print("\nEpoch {} of {}: TEST DATASET\nloss = {:.4f}\nDice = {:.4f}\n" \
                         "Sensitivity = {:.4f}\nSpecificity = {:.4f}" \
                         .format((step // num_batches), FLAGS.epochs,
                          loss_v_test, dice_v_test, sens_v_test, spec_v_test))

                        # Add our test summary metrics to TensorBoard
                        sv.summary_computed(
                            sess,
                            sess.run(test_loss_summary,
                                     feed_dict={test_loss_value: loss_v_test}))
                        sv.summary_computed(
                            sess,
                            sess.run(test_dice_summary,
                                     feed_dict={test_dice_value: dice_v_test}))
                        sv.summary_computed(
                            sess,
                            sess.run(test_sens_summary,
                                     feed_dict={
                                         test_sensitivity_value: sens_v_test
                                     }))
                        sv.summary_computed(
                            sess,
                            sess.run(test_spec_summary,
                                     feed_dict={
                                         test_specificity_value: spec_v_test
                                     }))

                        saver.save(
                            sess,
                            CHECKPOINT_DIRECTORY + "/last_good_model.cpkt")

                # Shuffle every epoch
                if (batch_idx == 0) and (step > num_batches):

                    print("Shuffling epoch")
                    epoch = get_epoch(batch_size, imgs_train, msks_train)

                # Print the loss and dice metric in the progress bar.
                progressbar.set_description(
                    "(loss={:.4f}, dice={:.4f})".format(loss_v, dice_v))
                progressbar.update(step - last_step)
                last_step = step

            # Perform the final test set metric
            if sv.is_chief:

                dice_v_test = 0.0
                loss_v_test = 0.0

                for idx in tqdm(range(0, imgs_test.shape[0] - batch_size,
                                      batch_size),
                                desc="Calculating metrics on test dataset",
                                leave=False):
                    x_test = imgs_test[idx:(idx + batch_size)]
                    y_test = msks_test[idx:(idx + batch_size)]

                    feed_dict = {imgs: x_test, msks: y_test}

                    l_v, d_v = sess.run([loss_value, dice_value],
                                        feed_dict=feed_dict)

                    dice_v_test += d_v / (test_length // batch_size)
                    loss_v_test += l_v / (test_length // batch_size)


                print("\nEpoch {} of {}: Test loss = {:.4f}, Test Dice = {:.4f}" \
                 .format((step // num_batches), FLAGS.epochs,
                  loss_v_test, dice_v_test))

                sv.summary_computed(
                    sess,
                    sess.run(test_loss_summary,
                             feed_dict={test_loss_value: loss_v_test}))
                sv.summary_computed(
                    sess,
                    sess.run(test_dice_summary,
                             feed_dict={test_dice_value: dice_v_test}))

                saver.save(sess,
                           CHECKPOINT_DIRECTORY + "/last_good_model.cpkt")

            if sv.is_chief:
                export_model(
                    sess, imgs, preds
                )  # Save the final model as protbuf for TensorFlow Serving

                os.killpg(os.getpgid(tb_process.pid),
                          signal.SIGTERM)  # Stop TensorBoard process

            # Send a signal to the ps when done by simply updating a queue in the shared graph
            for op in enq_ops:
                sess.run(
                    op
                )  # Send the "work completed" signal to the parameter server

        print("\n\nFinished work on this node.")
        import time
        time.sleep(3)  # Sleep for 3 seconds then exit

        sv.request_stop()
Exemple #5
0
def get_activations(data, model, config, sample_size=None):
    """
    This function goes over the data, one by one (batch size = 1), getting the max-pooled ngrams/activations,
    slot activations, and organizes them into a dict object for the "model interpretation" functions for the purpose
    of capturing the semantic meaning of each filter and calculating thresholds.
    """

    model.eval()
    n_iter = 0
    epoch_x, epoch_y, lengths_x = get_epoch(data["train_x"], data["train_y"], 1,
                                            is_train=False, num_examples=sample_size)

    interpretation_info = {
        "slot_activations": {},
        "chosen_ngrams_by_filter": {},
        "predicted_class": {}
    }

    for ngram_size in config["ngram_sizes"]:
        for filter_ix in range(config["num_filters"]):
            fname = "w" + str(ngram_size) + ".f" + str(filter_ix)
            interpretation_info["slot_activations"][fname] = []
            interpretation_info["chosen_ngrams_by_filter"][fname] = []
            interpretation_info["predicted_class"][fname] = []

    for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x):
        batch_x = torch.LongTensor(batch_x)
        batch_y = torch.LongTensor(batch_y)
        lengths_x = torch.LongTensor(length_x)

        if config["cuda"]:
            batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(), lengths_x.cuda()

        out = model(batch_x)

        # activations_filters = out['activations_filters']
        ngram_indices = out['ngram_indices']
        # activations_filters_pooled = out['activations_filters_pooled']
        logits = out['logits']

        indexed_seq = [int(x) for x in batch_x[0]]
        str_seq = [data["idx_to_word"][w] for w in indexed_seq]

        prediction = int(logits.squeeze().max(0)[1].item())

        ngram_indices = [[int(x) for x in indices.squeeze()] for indices in ngram_indices]
        filters = model.get_filters()

        max_w_size = max(config["ngram_sizes"])
        for w_size_ix, w_size in enumerate(config["ngram_sizes"]):
            seq = ['@@PAD@@'] * (max_w_size - 1) + str_seq + ['@@PAD@@'] * (max_w_size - 1)
            indexed_seq_padded = [data['word_to_idx']['@@PAD@@']] * (max_w_size - 1) + indexed_seq \
                                 + [data['word_to_idx']['@@PAD@@']] * (max_w_size - 1)

            for jx, ngram_ix in enumerate(ngram_indices[w_size_ix]):
                indexed_ngram = indexed_seq_padded[ngram_ix:ngram_ix + w_size]

                f, b = filters[w_size_ix]
                windows = [f[jx][k:k + config["embedding_dim"]] for k in range(0, f.size()[1], config["embedding_dim"])]
                # bias = b[jx]
                E = model.get_embeddings()
                ngram_embeddings = [E[k] for k in indexed_ngram]

                assert len(windows) == len(ngram_embeddings)
                slot_acts = [float(torch.dot(a, b)) for a, b in zip(windows, ngram_embeddings)]
                slot_acts = {str(vx): v for vx, v in enumerate(slot_acts)}

                # Uncomment to verify that this code is correct
                # i.e., the sum of slot activations + filter bias = pooled activation from model
                # #### ####
                # assert math.isclose(max(sum(word_values) + float(b[jx].item()), 0), pooled_vals[w_size_ix][jx],
                #                     rel_tol=1e-05, abs_tol=1e-05):

                fname = "w" + str(w_size) + ".f" + str(jx)

                interpretation_info["slot_activations"][fname].append(slot_acts)
                interpretation_info["chosen_ngrams_by_filter"][fname].append(seq[ngram_ix:ngram_ix + w_size])
                interpretation_info["predicted_class"][fname].append(prediction)

        n_iter += 1

        del batch_x, batch_y, lengths_x, out

    for fname in interpretation_info["slot_activations"]:
        interpretation_info["slot_activations"][fname] \
            = np.array([list(vals_dict.values()) for vals_dict in interpretation_info["slot_activations"][fname]])
        interpretation_info["predicted_class"][fname] = np.array(interpretation_info["predicted_class"][fname])

    return interpretation_info
Exemple #6
0
def interpret_predictions(data, model, config):
    """
    Get a list of prediction interpretations. Each instance in the list contains:
    * The input sentence
    * The gold label
    * The predicted label
    * For each filter:
      - The chosen ngram (by max-pooling)
      - The ngram's activation at the max-pooling layer (AFTER adding the filter bias + AFTER a ReLU layer)
      - The slot activation vector for the ngram
    """
    model.eval()
    n_iter = 0
    epoch_x, epoch_y, lengths_x = get_epoch(data["pred_x"], data["pred_y"], 1, is_train=False)

    prediction_info = []

    for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x):
        batch_x = torch.LongTensor(batch_x)
        batch_y = torch.LongTensor(batch_y)
        lengths_x = torch.LongTensor(length_x)

        if config["cuda"]:
            batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(), lengths_x.cuda()

        out = model(batch_x)

        pinfo = {}

        params = config

        # activations_filters = out['activations_filters']  # features
        # features = activations_filters
        ngram_indices = out['ngram_indices']
        activations_filters_pooled = out['activations_filters_pooled']  # pooled
        pooled = activations_filters_pooled
        logits = out['logits']

        indexed_seq = [int(x) for x in batch_x[0]]
        str_seq = [data["idx_to_word"][w] for w in indexed_seq]

        prediction = int(logits.squeeze().max(0)[1].item())
        prediction_str = config['class_to_str'][str(prediction)]
        gold = int(batch_y)
        gold_str = config['class_to_str'][str(gold)]

        pinfo["sentence"] = str_seq
        pinfo["gold"] = gold
        pinfo["gold_str"] = gold_str
        pinfo["prediction"] = prediction
        pinfo["prediction_str"] = prediction_str

        ngram_indices = [[int(x) for x in indices.squeeze()] for indices in ngram_indices]
        pooled_vals = [[float(x) for x in p.squeeze()] for p in pooled]
        filters = model.get_filters()

        max_ngram_len = max(params["ngram_sizes"])
        for ngram_len_idx, ngram_len in enumerate(params["ngram_sizes"]):
            seq = ['@@PAD@@'] * (max_ngram_len - 1) + str_seq + ['@@PAD@@'] * (max_ngram_len - 1)
            indexed_seq_padded = [data['word_to_idx']['@@PAD@@']] * (max_ngram_len - 1) + indexed_seq \
                                 + [data['word_to_idx']['@@PAD@@']] * (max_ngram_len - 1)

            for jx, ngram_ix in enumerate(ngram_indices[ngram_len_idx]):
                indexed_ngram = indexed_seq_padded[ngram_ix:ngram_ix + ngram_len]

                f, b = filters[ngram_len_idx]
                windows = [f[jx][k:k + params["embedding_dim"]] for k in range(0, f.size()[1], params["embedding_dim"])]
                # bias = b[jx]
                E = model.get_embeddings()
                ngram_embeddings = [E[k] for k in indexed_ngram]

                word_values = [float(torch.dot(a, b)) for a, b in zip(windows, ngram_embeddings)]

                fname = "w" + str(ngram_len) + ".f" + str(jx)

                if fname not in pinfo:
                    pinfo[fname] = {}
                pinfo[fname]["chosen_ngram_span"] = [ngram_ix, ngram_ix + ngram_len]
                pinfo[fname]["chosen_ngram"] = seq[ngram_ix:ngram_ix + ngram_len]
                pinfo[fname]["slot_activations"] = word_values
                pinfo[fname]["activation"] = pooled_vals[ngram_len_idx][jx]

        prediction_info.append(pinfo)

        n_iter += 1

        del batch_x, batch_y, lengths_x, out

    return prediction_info