コード例 #1
0
    def _unpack_model_batch_prediction(self,
                                       batch,
                                       coerce_tree=False) -> np.ndarray:
        """
        Interpret prediction result per batch
        coerce_tree = True if you want to ensure that the output forms a tree
        """
        out_dict = self.model(**batch)
        pred_matrix = out_dict["pred_matrix"]

        batch_interpretation = []
        for es in range(len(pred_matrix)):
            essay_pred = tonp(pred_matrix[es])

            # decoding using simple argmax
            essay_pred = np.argmax(essay_pred, axis=-1)
            dist_interpretation = []
            for i in range(len(essay_pred)):
                dist_interpretation.append(essay_pred[i] - i)

            # check if the output is a tree
            rep = TreeBuilder(dist_interpretation)
            if (not rep.is_tree()) and (coerce_tree == True):
                # run MINIMUM spanning tree
                attn_matrix = tonp(pred_matrix[es])
                attn_matrix = np.array(attn_matrix)
                rank_order = get_rank_order(attn_matrix)
                dist_interpretation = run_MST(
                    rank_order, rank_order, verdict="min"
                )  # --> use rank as the weight, "minimum" spanning tree, lower_rank number in rank is better

            # add the decoding result to the batch result
            batch_interpretation.append(dist_interpretation)
        return batch_interpretation
コード例 #2
0
def structured_output_quality(links) -> (List, float, float, float):
    """
    Infer component labels automatically from the structure
    """
    component_labels = []
    tree_ratio = 0
    avg_depth = 0
    avg_leaf_prop = 0
    all_depths = []

    n_essays = len(links)

    for i in range(len(links)):
        rep = TreeBuilder(links[i])
        component_labels.append(rep.auto_component_labels(AC_breakdown=True))

        if rep.is_tree():
            tree_ratio += 1

            # evaluate this only when the output forms a tree
            depth, leaf_prop = rep.tree_depth_and_leaf_proportion()
            avg_depth += depth
            all_depths.append(depth)
            avg_leaf_prop += leaf_prop

    return component_labels, float(tree_ratio) / float(n_essays), float(
        avg_depth) / float(tree_ratio), float(avg_leaf_prop) / float(
            tree_ratio), all_depths
コード例 #3
0
def f1_per_depth(dist_gold: List, dist_prediction: List, max_depth: int):
    """
    Find at which depth prediction mismatches happen (when the output forms a tree)

    Args:
        dist_gold (List): gold answer per essay
        dist_prediction (List): predicted answer per essay
        max_depth (int): max structure depth in the dataset

    Returns:
        tuple, i.e., (list, list, list)
    """
    gold_all_depth = []
    pred_all_depth = []

    for i in range(len(dist_gold)):
        rep_gold = TreeBuilder(dist_gold[i])
        rep_pred = TreeBuilder(dist_prediction[i])

        if rep_pred.is_tree():
            g_depths = rep_gold.node_depths()
            p_depths = rep_pred.node_depths()

            gold_all_depth.append(g_depths)
            pred_all_depth.append(p_depths)

    gold_all_depth_flat = flatten_list(gold_all_depth)
    pred_all_depth_flat = flatten_list(pred_all_depth)

    print("=== Depth prediction performance when output forms a tree ===")
    print(
        classification_report(y_true=gold_all_depth_flat,
                              y_pred=pred_all_depth_flat,
                              digits=3))
    report = classification_report(y_true=gold_all_depth_flat,
                                   y_pred=pred_all_depth_flat,
                                   output_dict=True)
    f1s = []
    for i in range(max_depth):
        try:
            f1s.append(report[str(i)]['f1-score'])
        except:
            f1s.append(0.0)

    return f1s
コード例 #4
0
    def _unpack_model_batch_prediction(self,
                                       batch,
                                       coerce_tree=False) -> np.ndarray:
        """
        Interpret prediction result per batch
        coerce_tree = True if we want to make sure that the predictions form a tree (using MST (min or max) algorithm)
        """
        out_dict = self.model(**batch)
        pred_linking_softmax = tonp(out_dict["pred_linking_softmax"])
        pred_node_labelling_softmax = tonp(
            out_dict["pred_node_labelling_softmax"])

        linking_preds = []
        node_labelling_preds = []
        for es in range(len(pred_linking_softmax)):
            essay_linking = []
            essay_labelling = []
            max_seq_len = batch["seq_len"][es]

            # simple decoding using argmax
            for s in range(
                    max_seq_len
            ):  # iterate each sentence in the essay, s is the index of the current sentence
                # perform constrained argmax for linking
                curr_link_softmax = pred_linking_softmax[es][s]
                ranked_pred = [
                    i for i in reversed(
                        sorted(enumerate(curr_link_softmax),
                               key=lambda x: x[1]))
                ]
                for i in range(len(ranked_pred)):
                    tmp_dist = self.dist_idx_to_dist(ranked_pred[i][0])
                    if 0 <= tmp_dist + s <= max_seq_len - 1:
                        pred_dist = tmp_dist
                        break

                # argmax for labelling
                curr_label_softmax = pred_node_labelling_softmax[es][s]
                pred_idx = np.argmax(curr_label_softmax)
                pred_label = self.component_idx_to_label(pred_idx)

                # essay-level result
                essay_linking.append(pred_dist)
                essay_labelling.append(pred_label)

            # check if the output is tree
            rep = TreeBuilder(essay_linking)
            if (not rep.is_tree()) and (coerce_tree == True):
                attn_matrix = [
                ]  # element [i,j] denotes the probability of sentence i connects to sentence j (j as the target)
                for s in range(
                        max_seq_len
                ):  # iterate each sentence in the essay, s is the index of the current sentence
                    curr_pred = pred_linking_softmax[es][s]

                    # get the prediction to each possible target sentence in the text
                    row_pred = [0] * max_seq_len
                    for i in range(len(curr_pred)):
                        temp_dist = self.dist_idx_to_dist(i)
                        value = curr_pred[i]
                        if 0 <= temp_dist + s <= max_seq_len - 1:
                            row_pred[temp_dist + s] = value

                    attn_matrix.append(row_pred)

                # run MAXIMUM spanning tree
                attn_matrix = np.array(attn_matrix)
                rank_order = get_rank_order(attn_matrix)
                essay_linking = run_MST(
                    rank_order, attn_matrix, verdict="max"
                )  # --> use the softmax probability as the weight, we run the maximum spanning tree here because higher probability means better

            # batch-level result
            linking_preds.append(essay_linking)
            node_labelling_preds.append(essay_labelling)

        return linking_preds, node_labelling_preds
コード例 #5
0
    def _unpack_model_batch_prediction(self,
                                       batch,
                                       coerce_tree=False) -> np.ndarray:
        """
        Interpret prediction result per batch
        """
        out_dict = self.model(**batch)
        pred_softmax = tonp(out_dict["pred_softmax"])
        # print("seq len", batch["seq_len"])
        # print(pred_softmax.shape)

        batch_interpretation = []
        for es in range(len(pred_softmax)):

            essay_interpretation = []
            max_seq_len = batch["seq_len"][es]

            # simple decoding using argmax
            for s in range(
                    max_seq_len
            ):  # iterate each sentence in the essay, s is the index of the current sentence
                curr_pred = pred_softmax[es][s]

                # perform constrained argmax
                ranked_pred = [
                    i for i in reversed(
                        sorted(enumerate(curr_pred), key=lambda x: x[1]))
                ]
                # print(ranked_pred)
                for i in range(len(ranked_pred)):
                    tmp_dist = self.dist_idx_to_dist(ranked_pred[i][0])
                    # print(tmp_dist, tmp_dist+s)
                    # input()
                    if 0 <= tmp_dist + s <= max_seq_len - 1:
                        pred_dist = tmp_dist
                        break

                essay_interpretation.append(pred_dist)

            # check if the output is tree
            rep = TreeBuilder(essay_interpretation)
            if (not rep.is_tree()) and (coerce_tree == True):
                attn_matrix = [
                ]  # element [i,j] denotes the probability of sentence i connects to sentence j (j as the target)
                for s in range(
                        max_seq_len
                ):  # iterate each sentence in the essay, s is the index of the current sentence
                    curr_pred = pred_softmax[es][s]

                    # get the prediction to each possible target sentence in the text
                    row_pred = [0] * max_seq_len
                    for i in range(len(curr_pred)):
                        temp_dist = self.dist_idx_to_dist(i)
                        value = curr_pred[i]
                        if 0 <= temp_dist + s <= max_seq_len - 1:
                            row_pred[temp_dist + s] = value

                    attn_matrix.append(row_pred)

                # run MAXIMUM spanning tree
                attn_matrix = np.array(attn_matrix)
                rank_order = get_rank_order(attn_matrix)
                essay_interpretation = run_MST(
                    rank_order, attn_matrix, verdict="max"
                )  # --> use the softmax probability as the weight, we run the maximum spanning tree here because higher probability means better

            batch_interpretation.append(essay_interpretation)

        return batch_interpretation