def eval_epoch(model, data, config): model.eval() n_iter = 0 epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"], data["valid_y"], config["batch_size"], is_train=False) epoch_loss = 0 corrects = 0 criterion = nn.CrossEntropyLoss() for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x): batch_x = torch.LongTensor(batch_x) batch_y = torch.LongTensor(batch_y) lengths_x = torch.LongTensor(length_x) if config["cuda"]: batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(), lengths_x.cuda() # optimizer.zero_grad() pred = model(batch_x)['logits'] loss = criterion(pred, batch_y) n_iter += 1 epoch_loss += float(loss) batch_corrects = int((torch.max(pred, 1)[1].view(batch_y.size()).data == batch_y.data).sum()) corrects += batch_corrects # if n_iter % 200 == 0: # eval() # model.train() del batch_x, batch_y, pred, loss return epoch_loss / len(data["valid_y"]), corrects / len(data["valid_y"]) * 100
def eval_epoch_with_thresholds(model, data, config, thresholds): """ Evaluate the thresholded model on the validation set """ model.eval() n_iter = 0 epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"], data["valid_y"], config["batch_size"], is_train=False) epoch_loss = 0 corrects = 0 criterion = nn.CrossEntropyLoss() thresh = [ torch.FloatTensor(thresh_row) for thresh_row in thresholds["thresholds"] ] if config["cuda"]: thresh = [t.cuda() for t in thresh] for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x): batch_x = torch.LongTensor(batch_x) batch_y = torch.LongTensor(batch_y) lengths_x = torch.LongTensor(length_x) if config["cuda"]: batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda( ), lengths_x.cuda() pred = model(batch_x, thresh)['logits'] loss = criterion(pred, batch_y) n_iter += 1 epoch_loss += float(loss) batch_corrects = int( (torch.max(pred, 1)[1].view(batch_y.size()).data == batch_y.data).sum()) corrects += batch_corrects # if n_iter % 200 == 0: # print(n_iter) del batch_x, batch_y, pred, loss return epoch_loss / len(data["valid_y"]), corrects / len( data["valid_y"]) * 100
def run_adversary_attack(model, data, config, advers_attack): model.eval() n_iter = 0 #epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"], data["valid_y"], config["batch_size"], is_train=False) epoch_x, epoch_y, lengths_x = get_epoch(data["valid_x"], data["valid_y"], 1, is_train=False) #num_examples=1) epoch_loss = 0 corrects = 0 criterion = nn.CrossEntropyLoss() results = { "TP": [0, 0], # number of examples changed and unchanged by adversarial attack "TN": [0, 0], "FP": [0, 0], "FN": [0, 0], } #print(len(epoch_x) , " examples") results_extended = [] for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x): #batch_x_advers = [a+advers_attack for a in batch_x ] #length_x_advers = [a+len(advers_attack) for a in length_x] batch_x_orig = batch_x.copy() length_x_orig = length_x.copy() #batch_x_advers_orig = batch_x_advers.copy() batch_x = torch.LongTensor(batch_x) batch_y = torch.LongTensor(batch_y) lengths_x = torch.LongTensor(length_x) if config["cuda"]: batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda( ), lengths_x.cuda() # optimizer.zero_grad() pred = model(batch_x)['logits'] pred_class = torch.max(pred, 1)[1].view(batch_y.size()).data batch_y_orig = batch_y.clone() if config["cuda"]: pred_class = pred_class.cpu().detach().numpy() batch_y = batch_y.cpu().detach().numpy() #this only works if batch size is 1 TYPE = "" if pred_class[0] == 1 and batch_y[0] == 1: TYPE = "TP" elif pred_class[0] == 0 and batch_y[0] == 0: TYPE = "TN" elif pred_class[0] == 1 and batch_y[0] == 0: TYPE = "FP" elif pred_class[0] == 0 and batch_y[0] == 1: TYPE = "FN" #if TYPE=="TN": # Adversarial attack on negative samples. If the model correctly predicts that a sample belongs to class 0. # TN if True: batch_x_advers_orig = [ a + advers_attack[TYPE] for a in batch_x_orig ] length_x_advers_orig = [ a + len(advers_attack[TYPE]) for a in length_x_orig ] batch_x_advers = torch.LongTensor(batch_x_advers_orig) lengths_x_advers = torch.LongTensor(length_x_advers_orig) if config["cuda"]: batch_x_advers, lengths_x_advers = batch_x_advers.cuda( ), lengths_x_advers.cuda() # optimizer.zero_grad() pred_advers = model(batch_x_advers)['logits'] pred_class_advers = torch.max(pred_advers, 1)[1].view(batch_y_orig.size()).data if config["cuda"]: pred_class_advers = pred_class_advers.cpu().detach().numpy() if pred_class[0] == pred_class_advers[0]: results[TYPE][1] += 1 else: results[TYPE][0] += 1 results_extended.append([ batch_y[0], pred_class[0], pred_class_advers[0], length_x_orig[0] ]) if False: #print(data["idx_to_word"].keys(), len(data["idx_to_word"].keys())) print( "Original sentence : ", " ".join([data["idx_to_word"][a] for a in batch_x_orig[0]])) print( "Adversarial sentence : ", " ".join([ data["idx_to_word"][a] for a in batch_x_advers_orig[0] ])) print("Truth {}, Pred {}, Adversarial {}".format( batch_y[0], pred_class[0], pred_class_advers[0])) return results, results_extended
def main(_): config = tf.ConfigProto(inter_op_parallelism_threads=num_inter_op_threads, intra_op_parallelism_threads=num_intra_op_threads) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # For Tensorflow trace cluster = tf.train.ClusterSpec({"ps": ps_list, "worker": worker_list}) server = tf.train.Server(cluster, job_name=job_name, task_index=task_index) is_sync = (FLAGS.is_sync == 1) # Synchronous or asynchronous updates is_chief = (task_index == 0) # Am I the chief node (always task 0) greedy = tf.contrib.training.GreedyLoadBalancingStrategy( num_tasks=len(ps_hosts), load_fn=tf.contrib.training.byte_size_load_fn) if job_name == "ps": with tf.device( tf.train.replica_device_setter( worker_device="/job:ps/task:{}".format(task_index), ps_tasks=len(ps_hosts), ps_strategy=greedy, cluster=cluster)): sess = tf.Session(server.target, config=config) queue = create_done_queue(task_index) print("*" * 30) print("\nParameter server #{} on {}.\n\n" \ "Waiting on workers to finish.\n\nPress CTRL-\\ to terminate early.\n" \ .format(task_index, ps_hosts[task_index])) print("*" * 30) # wait until all workers are done for i in range(len(worker_hosts)): sess.run(queue.dequeue()) print("Worker #{} reports job finished.".format(i)) print("Parameter server #{} is quitting".format(task_index)) print("Training complete.") elif job_name == "worker": if is_chief: print("I am chief worker {} with task #{}".format( worker_hosts[task_index], task_index)) else: print("I am worker {} with task #{}".format( worker_hosts[task_index], task_index)) if len(ps_list) > 0: setDevice = tf.train.replica_device_setter( worker_device="/job:worker/task:{}".format(task_index), ps_tasks=len(ps_hosts), ps_strategy=greedy, cluster=cluster) else: setDevice = "/cpu:0" # No parameter server so put variables on chief worker with tf.device(setDevice): global_step = tf.Variable(0, name="global_step", trainable=False) # Load the data imgs_train, msks_train, imgs_test, msks_test = load_all_data() train_length = imgs_train.shape[0] # Number of train datasets test_length = imgs_test.shape[0] # Number of test datasets """ BEGIN: Define our model """ imgs = tf.placeholder(tf.float32, shape=(None, msks_train.shape[1], msks_train.shape[2], msks_train.shape[3])) msks = tf.placeholder(tf.float32, shape=(None, msks_train.shape[1], msks_train.shape[2], msks_train.shape[3])) preds = define_model(imgs, FLAGS.use_upsampling, settings_dist.OUT_CHANNEL_NO) print('Model defined') loss_value = dice_coef_loss(msks, preds) dice_value = dice_coef(msks, preds) sensitivity_value = sensitivity(msks, preds) specificity_value = specificity(msks, preds) test_loss_value = tf.placeholder(tf.float32, ()) test_dice_value = tf.placeholder(tf.float32, ()) test_sensitivity_value = tf.placeholder(tf.float32, ()) test_specificity_value = tf.placeholder(tf.float32, ()) """ END: Define our model """ # Decay learning rate from initial_learn_rate to initial_learn_rate*fraction in decay_steps global steps if FLAGS.const_learningrate: learning_rate = tf.convert_to_tensor(FLAGS.learning_rate, dtype=tf.float32) else: learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, FLAGS.decay_steps, FLAGS.lr_fraction, staircase=False) # Compensate learning rate for asynchronous distributed # THEORY: We need to cut the learning rate by at least the number # of workers since there are likely to be that many times increased # parameter updates. # if not is_sync: # learning_rate /= len(worker_hosts) # optimizer = tf.train.GradientDescentOptimizer(learning_rate) # #optimizer = tf.train.AdagradOptimizer(learning_rate) # else: # optimizer = tf.train.AdamOptimizer(learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(loss_value) if is_sync: rep_op = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=len(worker_hosts), total_num_replicas=len(worker_hosts), use_locking=True) train_op = rep_op.apply_gradients(grads_and_vars, global_step=global_step) init_token_op = rep_op.get_init_tokens_op() chief_queue_runner = rep_op.get_chief_queue_runner() else: train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) init_op = tf.global_variables_initializer() saver = tf.train.Saver() # These are the values we wish to print to TensorBoard tf.summary.scalar("loss", loss_value) tf.summary.histogram("loss", loss_value) tf.summary.scalar("dice", dice_value) tf.summary.histogram("dice", dice_value) tf.summary.scalar("sensitivity", sensitivity_value) tf.summary.histogram("sensitivity", sensitivity_value) tf.summary.scalar("specificity", specificity_value) tf.summary.histogram("specificity", specificity_value) tf.summary.image("predictions", preds, max_outputs=settings_dist.TENSORBOARD_IMAGES) tf.summary.image("ground_truth", msks, max_outputs=settings_dist.TENSORBOARD_IMAGES) tf.summary.image("images", imgs, max_outputs=settings_dist.TENSORBOARD_IMAGES) print("Loading epoch") epoch = get_epoch(batch_size, imgs_train, msks_train) num_batches = len(epoch) print("Loaded") # Print the percent steps complete to TensorBoard # so that we know how much of the training remains. num_steps_tf = tf.constant(num_batches * FLAGS.epochs, tf.float32) percent_done_value = tf.constant(100.0) * tf.to_float( global_step) / num_steps_tf tf.summary.scalar("percent_complete", percent_done_value) # Need to remove the checkpoint directory before each new run # import shutil # shutil.rmtree(CHECKPOINT_DIRECTORY, ignore_errors=True) # Send a signal to the ps when done by simply updating a queue in the shared graph enq_ops = [] for q in create_done_queues(): qop = q.enqueue(1) enq_ops.append(qop) # Only the chief does the summary if is_chief: summary_op = tf.summary.merge_all() else: summary_op = None # Add summaries for test data # These summary ops are not part of the merge all op. # This way we can call these separately. test_loss_value = tf.placeholder(tf.float32, ()) test_dice_value = tf.placeholder(tf.float32, ()) test_loss_summary = tf.summary.scalar("loss_test", test_loss_value) test_dice_summary = tf.summary.scalar("dice_test", test_dice_value) test_sens_summary = tf.summary.scalar("sensitivity_test", test_sensitivity_value) test_spec_summary = tf.summary.scalar("specificity_test", test_specificity_value) # TODO: Theoretically I can pass the summary_op into # the Supervisor and have it handle the TensorBoard # log entries. However, doing so seems to hang the code. # For now, I just handle the summary calls explicitly. # import time # logDirName = CHECKPOINT_DIRECTORY + "/run" + \ # time.strftime("_%Y%m%d_%H%M%S") if FLAGS.use_upsampling: method_up = "upsample2D" else: method_up = "conv2DTranspose" logDirName = CHECKPOINT_DIRECTORY + "/unet," + \ "lr={},{},intra={},inter={}".format(FLAGS.learning_rate, method_up, num_intra_op_threads, num_inter_op_threads) sv = tf.train.Supervisor( is_chief=is_chief, logdir=logDirName, init_op=init_op, summary_op=None, saver=saver, global_step=global_step, save_model_secs=60 # Save the model (with weights) everty 60 seconds ) # TODO: # I'd like to use managed_session for this as it is more abstract # and probably less sensitive to changes from the TF team. However, # I am finding that the chief worker hangs on exit if I use managed_session. with sv.prepare_or_wait_for_session(server.target, config=config) as sess: #with sv.managed_session(server.target) as sess: if sv.is_chief and is_sync: sv.start_queue_runners(sess, [chief_queue_runner]) sess.run(init_token_op) step = 0 progressbar = trange(num_batches * FLAGS.epochs) last_step = 0 # Start TensorBoard on the chief worker if sv.is_chief: cmd = 'tensorboard --logdir={}'.format(CHECKPOINT_DIRECTORY) tb_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True, preexec_fn=os.setsid) while (not sv.should_stop()) and (step < (num_batches * FLAGS.epochs)): batch_idx = step % num_batches # Which batch is the epoch? data = epoch[batch_idx, 0] labels = epoch[batch_idx, 1] # For n workers, break up the batch into n sections # Send each worker a different section of the batch data_range = int(batch_size / len(worker_hosts)) start = data_range * task_index end = start + data_range feed_dict = {imgs: data[start:end], msks: labels[start:end]} history, loss_v, dice_v, step = sess.run( [train_op, loss_value, dice_value, global_step], feed_dict=feed_dict) # Print summary only on chief if sv.is_chief: summary = sess.run(summary_op, feed_dict=feed_dict) sv.summary_computed(sess, summary) # Update the summary # Calculate metric on test dataset every epoch if (batch_idx == 0) and (step > num_batches): dice_v_test = 0.0 loss_v_test = 0.0 sens_v_test = 0.0 spec_v_test = 0.0 for idx in tqdm( range(0, imgs_test.shape[0] - batch_size, batch_size), desc="Calculating metrics on test dataset", leave=False): x_test = imgs_test[idx:(idx + batch_size)] y_test = msks_test[idx:(idx + batch_size)] feed_dict = {imgs: x_test, msks: y_test} l_v, d_v, st_v, sp_v = sess.run( [ loss_value, dice_value, sensitivity_value, specificity_value ], feed_dict=feed_dict) dice_v_test += d_v / (test_length // batch_size) loss_v_test += l_v / (test_length // batch_size) sens_v_test += st_v / (test_length // batch_size) spec_v_test += sp_v / (test_length // batch_size) print("\nEpoch {} of {}: TEST DATASET\nloss = {:.4f}\nDice = {:.4f}\n" \ "Sensitivity = {:.4f}\nSpecificity = {:.4f}" \ .format((step // num_batches), FLAGS.epochs, loss_v_test, dice_v_test, sens_v_test, spec_v_test)) # Add our test summary metrics to TensorBoard sv.summary_computed( sess, sess.run(test_loss_summary, feed_dict={test_loss_value: loss_v_test})) sv.summary_computed( sess, sess.run(test_dice_summary, feed_dict={test_dice_value: dice_v_test})) sv.summary_computed( sess, sess.run(test_sens_summary, feed_dict={ test_sensitivity_value: sens_v_test })) sv.summary_computed( sess, sess.run(test_spec_summary, feed_dict={ test_specificity_value: spec_v_test })) saver.save( sess, CHECKPOINT_DIRECTORY + "/last_good_model.cpkt") # Shuffle every epoch if (batch_idx == 0) and (step > num_batches): print("Shuffling epoch") epoch = get_epoch(batch_size, imgs_train, msks_train) # Print the loss and dice metric in the progress bar. progressbar.set_description( "(loss={:.4f}, dice={:.4f})".format(loss_v, dice_v)) progressbar.update(step - last_step) last_step = step # Perform the final test set metric if sv.is_chief: dice_v_test = 0.0 loss_v_test = 0.0 for idx in tqdm(range(0, imgs_test.shape[0] - batch_size, batch_size), desc="Calculating metrics on test dataset", leave=False): x_test = imgs_test[idx:(idx + batch_size)] y_test = msks_test[idx:(idx + batch_size)] feed_dict = {imgs: x_test, msks: y_test} l_v, d_v = sess.run([loss_value, dice_value], feed_dict=feed_dict) dice_v_test += d_v / (test_length // batch_size) loss_v_test += l_v / (test_length // batch_size) print("\nEpoch {} of {}: Test loss = {:.4f}, Test Dice = {:.4f}" \ .format((step // num_batches), FLAGS.epochs, loss_v_test, dice_v_test)) sv.summary_computed( sess, sess.run(test_loss_summary, feed_dict={test_loss_value: loss_v_test})) sv.summary_computed( sess, sess.run(test_dice_summary, feed_dict={test_dice_value: dice_v_test})) saver.save(sess, CHECKPOINT_DIRECTORY + "/last_good_model.cpkt") if sv.is_chief: export_model( sess, imgs, preds ) # Save the final model as protbuf for TensorFlow Serving os.killpg(os.getpgid(tb_process.pid), signal.SIGTERM) # Stop TensorBoard process # Send a signal to the ps when done by simply updating a queue in the shared graph for op in enq_ops: sess.run( op ) # Send the "work completed" signal to the parameter server print("\n\nFinished work on this node.") import time time.sleep(3) # Sleep for 3 seconds then exit sv.request_stop()
def get_activations(data, model, config, sample_size=None): """ This function goes over the data, one by one (batch size = 1), getting the max-pooled ngrams/activations, slot activations, and organizes them into a dict object for the "model interpretation" functions for the purpose of capturing the semantic meaning of each filter and calculating thresholds. """ model.eval() n_iter = 0 epoch_x, epoch_y, lengths_x = get_epoch(data["train_x"], data["train_y"], 1, is_train=False, num_examples=sample_size) interpretation_info = { "slot_activations": {}, "chosen_ngrams_by_filter": {}, "predicted_class": {} } for ngram_size in config["ngram_sizes"]: for filter_ix in range(config["num_filters"]): fname = "w" + str(ngram_size) + ".f" + str(filter_ix) interpretation_info["slot_activations"][fname] = [] interpretation_info["chosen_ngrams_by_filter"][fname] = [] interpretation_info["predicted_class"][fname] = [] for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x): batch_x = torch.LongTensor(batch_x) batch_y = torch.LongTensor(batch_y) lengths_x = torch.LongTensor(length_x) if config["cuda"]: batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(), lengths_x.cuda() out = model(batch_x) # activations_filters = out['activations_filters'] ngram_indices = out['ngram_indices'] # activations_filters_pooled = out['activations_filters_pooled'] logits = out['logits'] indexed_seq = [int(x) for x in batch_x[0]] str_seq = [data["idx_to_word"][w] for w in indexed_seq] prediction = int(logits.squeeze().max(0)[1].item()) ngram_indices = [[int(x) for x in indices.squeeze()] for indices in ngram_indices] filters = model.get_filters() max_w_size = max(config["ngram_sizes"]) for w_size_ix, w_size in enumerate(config["ngram_sizes"]): seq = ['@@PAD@@'] * (max_w_size - 1) + str_seq + ['@@PAD@@'] * (max_w_size - 1) indexed_seq_padded = [data['word_to_idx']['@@PAD@@']] * (max_w_size - 1) + indexed_seq \ + [data['word_to_idx']['@@PAD@@']] * (max_w_size - 1) for jx, ngram_ix in enumerate(ngram_indices[w_size_ix]): indexed_ngram = indexed_seq_padded[ngram_ix:ngram_ix + w_size] f, b = filters[w_size_ix] windows = [f[jx][k:k + config["embedding_dim"]] for k in range(0, f.size()[1], config["embedding_dim"])] # bias = b[jx] E = model.get_embeddings() ngram_embeddings = [E[k] for k in indexed_ngram] assert len(windows) == len(ngram_embeddings) slot_acts = [float(torch.dot(a, b)) for a, b in zip(windows, ngram_embeddings)] slot_acts = {str(vx): v for vx, v in enumerate(slot_acts)} # Uncomment to verify that this code is correct # i.e., the sum of slot activations + filter bias = pooled activation from model # #### #### # assert math.isclose(max(sum(word_values) + float(b[jx].item()), 0), pooled_vals[w_size_ix][jx], # rel_tol=1e-05, abs_tol=1e-05): fname = "w" + str(w_size) + ".f" + str(jx) interpretation_info["slot_activations"][fname].append(slot_acts) interpretation_info["chosen_ngrams_by_filter"][fname].append(seq[ngram_ix:ngram_ix + w_size]) interpretation_info["predicted_class"][fname].append(prediction) n_iter += 1 del batch_x, batch_y, lengths_x, out for fname in interpretation_info["slot_activations"]: interpretation_info["slot_activations"][fname] \ = np.array([list(vals_dict.values()) for vals_dict in interpretation_info["slot_activations"][fname]]) interpretation_info["predicted_class"][fname] = np.array(interpretation_info["predicted_class"][fname]) return interpretation_info
def interpret_predictions(data, model, config): """ Get a list of prediction interpretations. Each instance in the list contains: * The input sentence * The gold label * The predicted label * For each filter: - The chosen ngram (by max-pooling) - The ngram's activation at the max-pooling layer (AFTER adding the filter bias + AFTER a ReLU layer) - The slot activation vector for the ngram """ model.eval() n_iter = 0 epoch_x, epoch_y, lengths_x = get_epoch(data["pred_x"], data["pred_y"], 1, is_train=False) prediction_info = [] for batch_x, batch_y, length_x in zip(epoch_x, epoch_y, lengths_x): batch_x = torch.LongTensor(batch_x) batch_y = torch.LongTensor(batch_y) lengths_x = torch.LongTensor(length_x) if config["cuda"]: batch_x, batch_y, lengths_x = batch_x.cuda(), batch_y.cuda(), lengths_x.cuda() out = model(batch_x) pinfo = {} params = config # activations_filters = out['activations_filters'] # features # features = activations_filters ngram_indices = out['ngram_indices'] activations_filters_pooled = out['activations_filters_pooled'] # pooled pooled = activations_filters_pooled logits = out['logits'] indexed_seq = [int(x) for x in batch_x[0]] str_seq = [data["idx_to_word"][w] for w in indexed_seq] prediction = int(logits.squeeze().max(0)[1].item()) prediction_str = config['class_to_str'][str(prediction)] gold = int(batch_y) gold_str = config['class_to_str'][str(gold)] pinfo["sentence"] = str_seq pinfo["gold"] = gold pinfo["gold_str"] = gold_str pinfo["prediction"] = prediction pinfo["prediction_str"] = prediction_str ngram_indices = [[int(x) for x in indices.squeeze()] for indices in ngram_indices] pooled_vals = [[float(x) for x in p.squeeze()] for p in pooled] filters = model.get_filters() max_ngram_len = max(params["ngram_sizes"]) for ngram_len_idx, ngram_len in enumerate(params["ngram_sizes"]): seq = ['@@PAD@@'] * (max_ngram_len - 1) + str_seq + ['@@PAD@@'] * (max_ngram_len - 1) indexed_seq_padded = [data['word_to_idx']['@@PAD@@']] * (max_ngram_len - 1) + indexed_seq \ + [data['word_to_idx']['@@PAD@@']] * (max_ngram_len - 1) for jx, ngram_ix in enumerate(ngram_indices[ngram_len_idx]): indexed_ngram = indexed_seq_padded[ngram_ix:ngram_ix + ngram_len] f, b = filters[ngram_len_idx] windows = [f[jx][k:k + params["embedding_dim"]] for k in range(0, f.size()[1], params["embedding_dim"])] # bias = b[jx] E = model.get_embeddings() ngram_embeddings = [E[k] for k in indexed_ngram] word_values = [float(torch.dot(a, b)) for a, b in zip(windows, ngram_embeddings)] fname = "w" + str(ngram_len) + ".f" + str(jx) if fname not in pinfo: pinfo[fname] = {} pinfo[fname]["chosen_ngram_span"] = [ngram_ix, ngram_ix + ngram_len] pinfo[fname]["chosen_ngram"] = seq[ngram_ix:ngram_ix + ngram_len] pinfo[fname]["slot_activations"] = word_values pinfo[fname]["activation"] = pooled_vals[ngram_len_idx][jx] prediction_info.append(pinfo) n_iter += 1 del batch_x, batch_y, lengths_x, out return prediction_info