예제 #1
0
def get_all_combination_Vikas_EdgeAPPROACH_withCoverage_best_graph(pred_labels_over_runs, performance_runs, gold_labels):
    runs = list(pred_labels_over_runs.keys())
    meta_subgraphs = []
    for i in range(len(runs)-1):
        meta_subgraphs += list(combinations(runs, i+2))
    print ("len of meta subgraphs are ", len(meta_subgraphs))
    meta_graph_scores = []
    meta_graph_coverage_scores = []

    for meta_sub_graph1 in meta_subgraphs:
        current_subgraph_score = []
        for ik1, rk1 in enumerate(meta_sub_graph1[:-1]):

            if ik1 == 0:  ## initializing the coverage list
               prediction_coverage = pred_labels_over_runs[rk1]

            for rk2 in meta_sub_graph1[ik1+1:]:
                current_subgraph_score.append(performance_runs[rk1] + performance_runs[rk2] / float(calculate_overlap_labels(pred_labels_over_runs[rk1], pred_labels_over_runs[rk2])))
                prediction_coverage = get_union(prediction_coverage, pred_labels_over_runs[rk2])

        final_coverage = sum(get_intersection(prediction_coverage, gold_labels))/float(sum(gold_labels))
        meta_graph_coverage_scores.append(final_coverage)
        meta_graph_scores.append( (sum(current_subgraph_score)/float(len(current_subgraph_score)) ) * final_coverage )  ## taking average of subgraph scores

    print ("the len of meta graph scores are : ", len(meta_graph_scores))
    best_sub_graph_index = meta_graph_scores.index(max(meta_graph_scores))
    print ("best subgraph is: ", meta_subgraphs[best_sub_graph_index], max(meta_graph_scores),meta_graph_coverage_scores[best_sub_graph_index])

    return meta_subgraphs[best_sub_graph_index]
예제 #2
0
def get_all_combination_withCoverage_best_graph_Cand_boost_withIDF(KB_terms, performance_runs, Ques_terms, Ans_terms, subgraph_size, IDF_vals):  ## gold_labels_list is QA terms and pred_labels_over_runs is justification terms
    runs = list(performance_runs.keys())
    All_QA_terms = list(set(Ques_terms + Ans_terms ))
    # print("the gold_labels list looks like: ", runs)
    meta_subgraphs = []

    for i in range(subgraph_size):
        meta_subgraphs += list(combinations(runs, i+2))

    # for i in range(subgraph_size):  ## for taking best subgraph amongst subgraphs of size 3,4,5
    #     meta_subgraphs += list(combinations(runs, i+3))

    # meta_subgraphs += list(combinations(runs, subgraph_size))

    meta_graph_scores = []
    meta_graph_coverage_scores = []
    meta_graph_ans_coverage_scores = []
    meta_graph_overlap_scores = []

    for meta_sub_graph1 in meta_subgraphs:
        current_subgraph_overlap = []
        current_subgraph_perf = []

        for ik1, rk1 in enumerate(meta_sub_graph1[:-1]):

            if ik1 == 0:  ## initializing the coverage list
                prediction_coverage = KB_terms[rk1]

            current_subgraph_perf.append(performance_runs[rk1])
            for rk2 in meta_sub_graph1[ik1 + 1:-1]:  ##### This is equivalent to M C 2

                current_subgraph_overlap.append(float(calculate_overlap_QA_terms(KB_terms[rk1], KB_terms[rk2], All_QA_terms)))
                prediction_coverage = get_union(prediction_coverage, KB_terms[rk2])

        avg_score = sum(current_subgraph_perf) / float(len(current_subgraph_perf))
        avg_overlap = sum(current_subgraph_overlap) / float(max(1,len(current_subgraph_overlap)))
        # print ("the ")
        final_query_coverage = get_intersection_withIDF(prediction_coverage, Ques_terms, IDF_vals) / max(1,float(len(Ques_terms)))
        final_ans_coverage = get_intersection_withIDF(prediction_coverage, Ans_terms, IDF_vals) / max(1,float(len(Ans_terms)))

        meta_graph_coverage_scores.append(final_query_coverage)
        meta_graph_ans_coverage_scores.append(final_ans_coverage)
        meta_graph_overlap_scores.append(avg_overlap)
        # meta_graph_scores.append( avg_score  * final_ans_coverage * final_query_coverage)  ## taking average of subgraph scores
        # if subgraph_size>2:
        #    print ("the avg score, overlap and coverage looks like: ", avg_score, avg_overlap, final_query_coverage, final_ans_coverage)
        # meta_graph_scores.append( (avg_score/float(1+avg_overlap))  * (1+1*final_ans_coverage) * (1+final_query_coverage) )  ## taking average of subgraph scores
        meta_graph_scores.append( (1+avg_score/float(1+avg_overlap)) *  (1+1*final_ans_coverage) * (1+final_query_coverage) )  ## taking average of subgraph scores ##  *  * # 1+avg_overlap

    # print ("the len of meta graph scores are : ", len(meta_graph_scores))
    try:
        best_sub_graph_index = meta_graph_scores.index(max(meta_graph_scores))
        # print ("checking weather this returns any overlap val or not ", meta_graph_overlap_scores)
        return meta_subgraphs[best_sub_graph_index], meta_graph_overlap_scores[best_sub_graph_index], meta_graph_coverage_scores[best_sub_graph_index], meta_graph_ans_coverage_scores[best_sub_graph_index]
    except ValueError:
        return "Crashed"
예제 #3
0
def get_all_combination_withCoverage_best_graph(
    KB_terms, performance_runs, Ques_terms, Ans_terms
):  ## gold_labels_list is QA terms and pred_labels_over_runs is justification terms
    runs = list(performance_runs.keys())
    gold_labels = Ques_terms + Ans_terms
    # print("the gold_labels list looks like: ", runs)
    meta_subgraphs = []

    # for i in range(len(runs)-1):
    #     meta_subgraphs += list(combinations(runs, i+2))

    meta_subgraphs += list(combinations(runs, 4))

    meta_graph_scores = []
    meta_graph_coverage_scores = []

    for meta_sub_graph1 in meta_subgraphs:
        current_subgraph_overlap = []
        current_subgraph_perf = []

        for ik1, rk1 in enumerate(meta_sub_graph1[:-1]):

            if ik1 == 0:  ## initializing the coverage list
                prediction_coverage = KB_terms[rk1]

            current_subgraph_perf.append(performance_runs[rk1])
            for rk2 in meta_sub_graph1[
                    ik1 + 1:-1]:  ##### This is equivalent to M C 2

                current_subgraph_overlap.append(
                    float(calculate_overlap(KB_terms[rk1], KB_terms[rk2])))
                prediction_coverage = get_union(prediction_coverage,
                                                KB_terms[rk2])

        avg_score = sum(current_subgraph_perf) / float(
            len(current_subgraph_perf))
        avg_overlap = sum(current_subgraph_overlap) / float(
            len(current_subgraph_overlap))
        # print ("the ")
        final_coverage = len(get_intersection(
            prediction_coverage, gold_labels)) / float(len(gold_labels))
        meta_graph_coverage_scores.append(final_coverage)
        # meta_graph_scores.append( (avg_score/float(avg_overlap+1)) * final_coverage )  ## taking average of subgraph scores
        meta_graph_scores.append(
            avg_score * final_coverage)  ## taking average of subgraph scores

    # print ("the len of meta graph scores are : ", len(meta_graph_scores))
    try:
        best_sub_graph_index = meta_graph_scores.index(max(meta_graph_scores))
        return meta_subgraphs[best_sub_graph_index]
    except ValueError:
        return "Crashed"
예제 #4
0
def get_best_linear_subgraph(pred_labels_over_runs, performance_runs, gold_labels_list, A1 = 1, A2 =1, A3 =1):  ## this is inclusion of coverage factor with Steve's graph suggestion
    runs = list(pred_labels_over_runs.keys())
    gold_labels = list(range(len(gold_labels_list)))
    print("the gold_labels list looks like: ", gold_labels)
    meta_subgraphs = []
    for i in range(len(runs)-1):
        meta_subgraphs += list(combinations(runs, i+2))

    meta_graph_scores = []
    meta_graph_coverage_scores = []

    for meta_sub_graph1 in meta_subgraphs:
        current_subgraph_overlap = []
        current_subgraph_perf = []

        for ik1, rk1 in enumerate(meta_sub_graph1[:-1]):

            if ik1 == 0:  ## initializing the coverage list
               prediction_coverage = pred_labels_over_runs[rk1]

            current_subgraph_perf.append(performance_runs[rk1])
            for rk2 in meta_sub_graph1[ik1+1:]:
                current_subgraph_overlap.append (float(calculate_overlap(pred_labels_over_runs[rk1], pred_labels_over_runs[rk2]) ) )
                prediction_coverage = get_union(prediction_coverage, pred_labels_over_runs[rk2])

        avg_score =  sum(current_subgraph_perf)/float(len(current_subgraph_perf))
        avg_overlap =  sum(current_subgraph_overlap)/float(len(current_subgraph_overlap))
        # print ("the ")
        final_coverage = len(get_intersection(prediction_coverage, gold_labels))/float(len(gold_labels))
        meta_graph_coverage_scores.append(final_coverage)
        meta_graph_scores.append( (A1*avg_score + A2*float(avg_overlap)) + A3*final_coverage )  ## taking average of subgraph scores

    print ("the len of meta graph scores are : ", len(meta_graph_scores))
    best_sub_graph_index = meta_graph_scores.index(max(meta_graph_scores))
    print ("best subgraph from linear regression is: ", meta_subgraphs[best_sub_graph_index], max(meta_graph_scores),meta_graph_coverage_scores[best_sub_graph_index])

    return meta_subgraphs[best_sub_graph_index]
예제 #5
0
def get_all_combination_forN_sizes_withCoverage_best_graph_BECKY_analysis(pred_labels_over_runs, all_prediction_label_runs, performance_runs, gold_labels_list, BiNODE_overlap, mean_score):
    runs = list(pred_labels_over_runs.keys())
    best_subgraphs_diff_sizes = {}  ### (P/O)*C
    best_subgraphs_overlaps = {}  ## just 1/O factor
    best_subgraphs_Perf_Over = {}  ## Just (P/O) factor, no coverage

    gold_labels = list(range(len(gold_labels_list)))

    all_subgraphs = []
    feature_x = []
    label_y = []

    Ensemble_predictions_runs = {} ## we are saving this to complete Becky's analysis

    POC_score = 0
    POC_subgraph = []

    for i in range(len(runs)-1):
        meta_subgraphs = list(combinations(runs, i+2))

        meta_graph_scores = []  ### same sequence as above
        meta_graph_Overlap_scores = []
        meta_graph_PERF_Overlap_scores = []

        Ensemble_predictions = []

        meta_graph_coverage_scores = []

        for meta_sub_graph1 in meta_subgraphs:
            current_subgraph_overlap = []
            current_subgraph_perf = []

            for ik1, rk1 in enumerate(meta_sub_graph1[:-1]):

                if ik1 == 0:  ## initializing the coverage list
                   prediction_coverage = pred_labels_over_runs[rk1]

                current_subgraph_perf.append(performance_runs[rk1])
                for rk2 in meta_sub_graph1[ik1+1:]:
                    current_subgraph_overlap.append (BiNODE_overlap[str(rk1)+str(rk2)] )
                    prediction_coverage = get_union(prediction_coverage, pred_labels_over_runs[rk2])

            avg_score =  sum(current_subgraph_perf)/float(len(current_subgraph_perf))
            avg_overlap =  sum(current_subgraph_overlap)/float(len(current_subgraph_overlap))
            # print ("the ")
            final_coverage = len(get_intersection(prediction_coverage, gold_labels))/float(len(gold_labels))
            meta_graph_coverage_scores.append(final_coverage)
            meta_graph_scores.append( (avg_score/float(avg_overlap)) * final_coverage )  ## taking average of subgraph scores

            ############### for linear regression statistics and feature generation
            # feature_x.append([avg_score, 1/float(avg_overlap), final_coverage, avg_score/float(avg_overlap),(avg_score/float(avg_overlap))*final_coverage, avg_score*final_coverage])
            feature_x.append([avg_score, avg_overlap, final_coverage])
            best_subgraph_preds = {mn1: all_prediction_label_runs[mn1] for mn1 in meta_sub_graph1}
            subgraph_ensemble_performance, current_ensemble_voted_preds = meta_voting_ensemble_BECKY(best_subgraph_preds, gold_labels_list, math.ceil(len(meta_sub_graph1) / 2))
            # print("the subgraph ensemble performance looks like:  ", subgraph_ensemble_performance)
            Ensemble_predictions.append(current_ensemble_voted_preds)
            label_y.append(subgraph_ensemble_performance - mean_score)
            all_subgraphs.append(meta_sub_graph1)

            ###################

            meta_graph_Overlap_scores.append(1/float(avg_overlap))
            meta_graph_PERF_Overlap_scores.append(avg_score/float(avg_overlap))
        # print ("the len of meta graph scores are : ", len(meta_graph_scores))
        best_sub_graph_index = meta_graph_scores.index(max(meta_graph_scores))
        sorted_index_ens = np.argsort(meta_graph_scores)

        sorted_ensemble_predictions = [Ensemble_predictions[Ens_ind1] for Ens_ind1 in sorted_index_ens]
        Ensemble_predictions_runs.update({str(len(meta_sub_graph1)):sorted_ensemble_predictions})

        if max(meta_graph_scores)>POC_score:
           POC_score = max(meta_graph_scores)
           POC_subgraph =  meta_subgraphs[best_sub_graph_index]

        print ("best subgraph is: ", meta_subgraphs[best_sub_graph_index], max(meta_graph_scores),meta_graph_coverage_scores[best_sub_graph_index])
        best_subgraphs_diff_sizes.update({i+2: meta_subgraphs[best_sub_graph_index]})
        best_subgraphs_overlaps.update({i+2: meta_subgraphs[meta_graph_Overlap_scores.index(max(meta_graph_Overlap_scores))]})
        best_subgraphs_Perf_Over.update({i+2:meta_subgraphs[meta_graph_PERF_Overlap_scores.index(max(meta_graph_PERF_Overlap_scores))]})


    return best_subgraphs_diff_sizes, best_subgraphs_overlaps, best_subgraphs_Perf_Over, feature_x, label_y, all_subgraphs, POC_subgraph, Ensemble_predictions_runs