Beispiel #1
0
def run_test(model_path: RichPath, test_data_path: RichPath, type_lattice_path: RichPath, alias_metadata_path: RichPath, print_predictions: bool = False):
    test_run_id = "_".join(
        [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())])

    test_hyper_overrides = {
        'run_id': test_run_id,
        "dropout_keep_rate": 1.0,
    }

    test_data_chunks = test_data_path.get_filtered_files_in_dir('*gz')

    # Restore model
    model = model_restore_helper.restore(
        model_path, is_train=False, hyper_overrides=test_hyper_overrides)

    evaluator = TypePredictionEvaluator(type_lattice_path, alias_metadata_path)

    all_annotations = model.annotate(test_data_chunks)
    for annotation in all_annotations:
        if ignore_type_annotation(annotation.original_annotation):
            continue
        predicted_annotation = max(annotation.predicted_annotation_logprob_dist,
                                   key=lambda x: annotation.predicted_annotation_logprob_dist[x])
        if print_predictions:
            print(
                f'{annotation.provenance} -- {annotation.name}: {annotation.original_annotation} -> {predicted_annotation} ({math.exp(annotation.predicted_annotation_logprob_dist[predicted_annotation])*100:.1f}%)')
        evaluator.add_sample(ground_truth=annotation.original_annotation,
                             predicted_dist=annotation.predicted_annotation_logprob_dist)

    print(json.dumps(evaluator.metrics(), indent=2, sort_keys=True))
Beispiel #2
0
    def _load_data_from_sample(hyperparameters: Dict[str, Any],
                               metadata: Dict[str, Any],
                               raw_sample: Dict[str, Any],
                               result_holder: Dict[str, Any],
                               is_train: bool = True) -> bool:
        keep_sample = super(Path2Annotation,
                            Path2Annotation)._load_data_from_sample(
                                hyperparameters, metadata, raw_sample,
                                result_holder, is_train)
        if not keep_sample:
            return False

        target_class = []
        for node_idx, annotation_data in raw_sample['supernodes'].items():
            annotation = annotation_data['annotation']
            if is_train and ignore_type_annotation(annotation):
                continue

            target_class.append(
                TypeClassificationModel._get_idx_for_type(
                    annotation, metadata, hyperparameters))

        result_holder['variable_target_class'] = np.array(target_class,
                                                          dtype=np.uint16)
        return len(target_class) > 0
Beispiel #3
0
    def _load_data_from_sample(hyperparameters: Dict[str, Any],
                               metadata: Dict[str, Any],
                               raw_sample: Dict[str, Any],
                               result_holder: Dict[str, Any],
                               is_train: bool = True) -> bool:
        keep_sample = super(Graph2HybridMetric,
                            Graph2HybridMetric)._load_data_from_sample(
                                hyperparameters, metadata, raw_sample,
                                result_holder, is_train)
        if not keep_sample:
            return False

        target_node_idxs, target_class, target_class_id = [], [], []
        for node_idx, annotation_data in raw_sample['supernodes'].items():
            node_idx = int(node_idx)
            annotation = annotation_data['annotation']
            if is_train and ignore_type_annotation(annotation):
                continue
            target_node_idxs.append(node_idx)
            target_class.append(annotation)
            target_class_id.append(
                TypeClassificationModel._get_idx_for_type(
                    annotation, metadata, hyperparameters))

        result_holder['target_node_idxs'] = np.array(target_node_idxs,
                                                     dtype=np.uint16)
        result_holder['target_type'] = target_class
        result_holder['variable_target_class'] = np.array(target_class_id,
                                                          dtype=np.uint16)
        return len(target_node_idxs) > 0
Beispiel #4
0
 def errorenous_predictions():
     for prediction in predictions_jsonl.read_by_file_suffix():
         if ignore_type_annotation(prediction['original_annotation']):
             continue
         top_prediction = prediction["predicted_annotation_logprob_dist"][
             0][0]
         if top_prediction != prediction['original_annotation']:
             yield prediction
 def _load_metadata_from_sample(hyperparameters: Dict[str, Any],
                                raw_sample: Dict[str, Any],
                                raw_metadata: Dict[str, Any]) -> None:
     annotations = (
         annotation_data['annotation']
         for annotation_data in raw_sample['supernodes'].values()
         if not ignore_type_annotation(annotation_data['annotation']))
     if 'strip_type_parameters' in hyperparameters and hyperparameters[
             'strip_type_parameters']:
         annotations = (t.split("[")[0] for t in annotations)
     raw_metadata['type_occurences_counter'].update(annotations)
Beispiel #6
0
def run_test(preds_path: RichPath,
             type_lattice_path: RichPath,
             alias_metadata_path: RichPath,
             result_path: str,
             top_n=10,
             print_predictions: bool = False):
    # test_run_id = "_".join(
    #     [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())])

    # test_hyper_overrides = {
    #     'run_id': test_run_id,
    #     "dropout_keep_rate": 1.0,
    # }

    #test_data_chunks = test_data_path.get_filtered_files_in_dir('*gz')

    # Restore model
    # model = model_restore_helper.restore(
    #     model_path, is_train=False, hyper_overrides=test_hyper_overrides)
    # all_annotations = model.annotate(test_data_chunks)

    evaluator = TypePredictionEvaluator(type_lattice_path,
                                        alias_metadata_path,
                                        top_n=top_n)

    for i, annotation in enumerate(load_jsonl_gz(preds_path)):
        if ignore_type_annotation(annotation['original_annotation']):
            continue

        if print_predictions:
            predicted_annotation = max(
                annotation['predicted_annotation_logprob_dist'],
                key=lambda x: annotation['predicted_annotation_logprob_dist'][
                    x])
            print(
                f'{annotation.provenance} -- {annotation.name}: {annotation.original_annotation} -> {predicted_annotation} ({math.exp(annotation.predicted_annotation_logprob_dist[predicted_annotation])*100:.1f}%)'
            )
        evaluator.add_sample(
            ground_truth=annotation['original_annotation'],
            predicted_dist=annotation['predicted_annotation_logprob_dist'])

        # if i > 1500: break

    with open(result_path, 'w') as f:
        f.write(json.dumps(evaluator.metrics(), indent=2, sort_keys=True))
Beispiel #7
0
    def _load_data_from_sample(hyperparameters: Dict[str, Any],
                               metadata: Dict[str, Any],
                               raw_sample: Dict[str, Any],
                               result_holder: Dict[str, Any],
                               is_train: bool = True) -> bool:
        keep_sample = super(Path2Metric, Path2Metric)._load_data_from_sample(
            hyperparameters, metadata, raw_sample, result_holder, is_train)
        if not keep_sample:
            return False

        target_class = []
        for node_idx, annotation_data in raw_sample['supernodes'].items():
            annotation = annotation_data['annotation']
            if is_train and ignore_type_annotation(annotation):
                continue
            target_class.append(annotation)

        result_holder['target_type'] = target_class
        return len(target_class) > 0
        def representation_iter():
            data_chunk_iterator = (r.read_by_file_suffix() for r in data_paths)
            with self.__model.sess.as_default():
                for raw_data_chunk in data_chunk_iterator:
                    for raw_sample in raw_data_chunk:
                        loaded_sample = {}
                        use_example = self.__model._load_data_from_sample(
                            self.__model.hyperparameters,
                            self.__model.metadata,
                            raw_sample=raw_sample,
                            result_holder=loaded_sample,
                            is_train=False)
                        if not use_example:
                            continue

                        _, fetches = self.__model._run_epoch_in_batches(
                            loaded_sample,
                            '(indexing)',
                            is_train=False,
                            quiet=True,
                            additional_fetch_dict={
                                'target_representations':
                                self.__model.ops['target_representations']
                            })
                        target_representations = fetches[
                            'target_representations']

                        idx = 0
                        for node_idx, annotation_data in raw_sample[
                                'supernodes'].items():
                            node_idx = int(node_idx)
                            if 'ignored_supernodes' in loaded_sample and node_idx in loaded_sample[
                                    'ignored_supernodes']:
                                continue

                            annotation = annotation_data['annotation']
                            if ignore_type_annotation(annotation):
                                idx += 1
                                continue

                            yield target_representations[idx], annotation
                            idx += 1
    def _load_data_from_sample(hyperparameters: Dict[str, Any],
                               metadata: Dict[str, Any],
                               raw_sample: Dict[str, Any],
                               result_holder: Dict[str, Any],
                               is_train: bool = True) -> bool:
        keep_sample = super(Sequence2HybridMetric,
                            Sequence2HybridMetric)._load_data_from_sample(
                                hyperparameters, metadata, raw_sample,
                                result_holder, is_train)
        if not keep_sample:
            return False

        token_node_idxs = set(raw_sample['token-sequence'])
        node_idx_to_supernode_idx = {}  #  type: Dict[int, int]
        for from_idx, to_idxs in raw_sample['edges']['OCCURRENCE_OF'].items():
            from_idx = int(from_idx)
            if from_idx not in token_node_idxs:
                # Some supernodes do not have an associated token. Such nodes are attributes
                if str(from_idx) in raw_sample['edges']['CHILD']:
                    right_token_idx = max(
                        raw_sample['edges']['CHILD'][str(from_idx)])
                    assert right_token_idx in token_node_idxs
                    from_idx = right_token_idx
                else:
                    continue
            for to_idx in to_idxs:
                node_idx_to_supernode_idx[from_idx] = to_idx

        supernodes_with_related_nodes = set(node_idx_to_supernode_idx.values())

        variable_types = []  # type: List[str]
        variable_type_idxs = []  # type: List[int]
        ignored_supernodes = set()
        supernode_idxs_to_annotated_variable_idx = {}  # type: Dict[int, int]
        for node_idx, supernode_data in raw_sample['supernodes'].items():
            node_idx = int(node_idx)
            annotation = supernode_data['annotation']
            if ignore_type_annotation(annotation) and is_train:
                ignored_supernodes.add(node_idx)
                continue
            if node_idx not in supernodes_with_related_nodes:
                ignored_supernodes.add(node_idx)
                continue

            variable_idx = len(supernode_idxs_to_annotated_variable_idx)
            variable_types.append(annotation)
            variable_type_idxs.append(
                TypeClassificationModel._get_idx_for_type(
                    annotation, metadata, hyperparameters))
            supernode_idxs_to_annotated_variable_idx[node_idx] = variable_idx

        if len(variable_types) == 0:
            return False

        token_idx, variable_idx = [], []

        def create_token_sequence():
            for i, node_idx in enumerate(raw_sample['token-sequence']):
                supernode_idx = node_idx_to_supernode_idx.get(node_idx)
                if supernode_idx is not None:
                    annotated_variable_idxs = supernode_idxs_to_annotated_variable_idx.get(
                        supernode_idx)
                    if annotated_variable_idxs is not None:
                        token_idx.append(i)
                        variable_idx.append(annotated_variable_idxs)
                yield raw_sample['nodes'][node_idx]

        token_sequence = list(create_token_sequence())
        if len(token_sequence) > hyperparameters['max_seq_len']:
            return False

        # Did we see at least one token per variable?
        assert len(np.unique(variable_idx)) == len(variable_types)

        TokenEmbedder.load_data_from_sample('token', metadata, token_sequence,
                                            result_holder, hyperparameters,
                                            is_train)

        result_holder['sequence_length'] = len(token_sequence)
        result_holder['variable_token_idxs'] = np.array(token_idx,
                                                        dtype=np.uint32)
        result_holder['variable_idxs'] = np.array(variable_idx,
                                                  dtype=np.uint32)
        result_holder['target_type'] = variable_types
        result_holder['variable_target_class'] = np.array(variable_type_idxs,
                                                          dtype=np.uint32)
        result_holder['ignored_supernodes'] = ignored_supernodes
        return keep_sample
Beispiel #10
0
def ignore_annotation(annotation_str: Optional[str]) -> bool:
    if annotation_str is None:
        return False
    return ignore_type_annotation(annotation_str)
Beispiel #11
0
def run(arguments):
    azure_info_path = arguments.get('--azure-info', None)
    predictions_jsonl = RichPath.create(arguments['PREDICTIONS_PATH'],
                                        azure_info_path)
    type_lattice_path = RichPath.create(arguments['TYPE_LATTICE_PATH'],
                                        azure_info_path)
    alias_metadata_path = RichPath.create(arguments['ALIAS_METADATA'],
                                          azure_info_path)

    type_lattice = TypeLattice(type_lattice_path, 'typing.Any',
                               alias_metadata_path)

    exact_match_metric = MetricForPrecRec('Exact Match')
    up_to_parametric_match_metric = MetricForPrecRec(
        'Match Up to Parametric Type')
    type_correct_metric = MetricForPrecRec('Type Neutral')

    for prediction in predictions_jsonl.read_as_jsonl():
        ground_truth = prediction['original_annotation']
        if ignore_type_annotation(ground_truth):
            continue
        top_prediction, prediction_logprob = prediction[
            "predicted_annotation_logprob_dist"][0]

        is_exact_match = type_lattice.are_same_type(ground_truth,
                                                    top_prediction)
        exact_match_metric.add(is_exact_match, prediction_logprob)

        correct_up_to_parametric = is_exact_match or type_lattice.are_same_type(
            ground_truth.split("[")[0],
            top_prediction.split("[")[0])
        up_to_parametric_match_metric.add(correct_up_to_parametric,
                                          prediction_logprob)

        if is_exact_match:
            type_correct_metric.add(True, prediction_logprob)
        elif ground_truth in type_lattice and top_prediction in type_lattice:
            ground_truth_node_idx = type_lattice.id_of(ground_truth)
            predicted_node_idx = type_lattice.id_of(top_prediction)

            intersection_nodes_idx = type_lattice.intersect(
                ground_truth_node_idx, predicted_node_idx)
            is_ground_subtype_of_predicted = ground_truth_node_idx in intersection_nodes_idx
            type_correct_metric.add(is_ground_subtype_of_predicted,
                                    prediction_logprob)

    pr_curves = [
        exact_match_metric.get_pr_curve() + ('r--', exact_match_metric),
        up_to_parametric_match_metric.get_pr_curve() +
        ('b:', up_to_parametric_match_metric),
        type_correct_metric.get_pr_curve() + ('k-', type_correct_metric)
    ]

    fig = plt.figure(figsize=(5.5, 2.5))
    ax = fig.add_subplot(111)
    for name, precision, recall, _, style, metric in pr_curves:
        print(name, metric.num_elements, recall, precision)
        print(f'{name}: {precision[0]:%}')
        ax.plot(recall, precision, style, label=name, linewidth=2)
    plt.grid()
    plt.xlim([0, 1.005])
    plt.ylim([0, 1.005])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.legend()
    plt.tight_layout()
    plt.savefig('test.pdf', dpi=300)
def compute(predictions_path: RichPath,
            type_lattice_path: RichPath,
            alias_metadata_path: RichPath,
            json_preds: str,
            top_n: int = 10):

    with open(json_preds) as f:
        json_data = json.load(f)['per_type_stats']

    type_lattice = TypeLattice(type_lattice_path, 'typing.Any',
                               alias_metadata_path)
    data = predictions_path.read_as_jsonl()

    total_per_kind = defaultdict(int)
    correct_per_kind = defaultdict(int)
    up_to_parameteric_per_kind = defaultdict(int)
    # type_consistency_per_kind = defaultdict(int)
    # total_per_kind_for_consistency = defaultdict(int)

    common_tr = 100
    corr_exact_per_kind = defaultdict(lambda: defaultdict(int))
    corr_param_per_kind = defaultdict(lambda: defaultdict(int))
    corr_exact = defaultdict(int)
    corr_param = defaultdict(int)

    # For recall and precision calculation
    pred_annotation = []
    true_annotation = []
    pred_per_type = defaultdict(list)
    true_per_type = defaultdict(list)

    for prediction in data:
        annotation_type = prediction['annotation_type']
        original_annotation = prediction['original_annotation']
        if ignore_type_annotation(
                original_annotation
        ) or annotation_type == 'variable' or annotation_type == 'imported':
            continue

        true_annotation.append(original_annotation)
        true_per_type[annotation_type].append(original_annotation)
        total_per_kind[annotation_type] += 1
        #top_predicted = prediction['predicted_annotation_logprob_dist'][0][0]
        #print(prediction['predicted_annotation_logprob_dist'])
        is_exact_match = False
        is_accurate_utpt = False

        for t, s in prediction['predicted_annotation_logprob_dist'][:top_n]:
            is_exact_match = type_lattice.are_same_type(original_annotation, t)
            if is_exact_match:
                correct_per_kind[annotation_type] += 1
                corr_exact['all'] += 1
                if json_data[original_annotation]['count'] > common_tr:
                    corr_exact_per_kind[annotation_type]['corr_common'] += 1
                    corr_exact['corr_common'] += 1
                else:
                    corr_exact_per_kind[annotation_type]['corr_rare'] += 1
                    corr_exact['corr_rare'] += 1

                pred_annotation.append(t)
                pred_per_type[annotation_type].append(t)
                break

        for t, s in prediction['predicted_annotation_logprob_dist'][:top_n]:
            is_accurate_utpt = type_lattice.are_same_type(
                original_annotation.split("[")[0],
                t.split("[")[0])
            if is_accurate_utpt:
                up_to_parameteric_per_kind[annotation_type] += 1
                corr_param['all'] += 1
                if json_data[original_annotation]['count'] > common_tr:
                    corr_param_per_kind[annotation_type]['corr_common'] += 1
                    corr_param['corr_common'] += 1
                else:
                    corr_param_per_kind[annotation_type]['corr_rare'] += 1
                    corr_param['corr_rare'] += 1
                break

        if not is_exact_match:
            if json_data[original_annotation]['count'] > common_tr:
                corr_exact_per_kind[annotation_type]['incorr_common'] += 1
                corr_exact['incorr_common'] += 1
            else:
                corr_exact_per_kind[annotation_type]['incorr_rare'] += 1
                corr_exact['incorr_rare'] += 1
            pred_annotation.append(
                prediction['predicted_annotation_logprob_dist'][0][0])
            pred_per_type[annotation_type].append(
                prediction['predicted_annotation_logprob_dist'][0][0])

        if not is_accurate_utpt:
            if json_data[original_annotation]['count'] > common_tr:
                corr_param_per_kind[annotation_type]['incorr_common'] += 1
                corr_param['incorr_common'] += 1
            else:
                corr_param_per_kind[annotation_type]['incorr_rare'] += 1
                corr_param['incorr_rare'] += 1
        # if is_exact_match:
        #     type_consistency_per_kind[annotation_type] += 1
        #     total_per_kind_for_consistency[annotation_type] += 1
        # elif original_annotation in type_lattice and top_predicted in type_lattice:
        #     # Type Consistency
        #     ground_truth_node_idx = type_lattice.id_of(original_annotation)
        #     predicted_node_idx = type_lattice.id_of(top_predicted)

        #     intersection_nodes_idx = type_lattice.intersect(ground_truth_node_idx, predicted_node_idx)
        #     is_ground_subtype_of_predicted = ground_truth_node_idx in intersection_nodes_idx
        #     total_per_kind_for_consistency[annotation_type] += 1
        #     if is_ground_subtype_of_predicted:
        #         type_consistency_per_kind[annotation_type] += 1

    print('== Exact Match')
    for annot_type in total_per_kind:
        try:
            print(
                f'{annot_type}: {correct_per_kind[annot_type] / total_per_kind[annot_type] * 100.0 :.2f} ({correct_per_kind[annot_type]}/{total_per_kind[annot_type]})'
            )
            print(
                f"Common - {annot_type}: {corr_exact_per_kind[annot_type]['corr_common'] / (corr_exact_per_kind[annot_type]['corr_common'] + corr_exact_per_kind[annot_type]['incorr_common']) * 100.0 :.2f}"
            )
            print(
                f"Rare - {annot_type}: {corr_exact_per_kind[annot_type]['corr_rare'] / (corr_exact_per_kind[annot_type]['corr_rare'] + corr_exact_per_kind[annot_type]['incorr_rare']) * 100.0 :.2f}"
            )
            r = classification_report(true_per_type[annot_type],
                                      pred_per_type[annot_type],
                                      output_dict=True)
            print(
                f"{annot_type}: F1: {r['weighted avg']['f1-score'] * 100:.2f} R: {r['weighted avg']['recall'] * 100:.2f} P: {r['weighted avg']['precision'] * 100:.2f}"
            )
            print("******************************")
        except ZeroDivisionError:
            pass
    print('== Up to Parametric')
    for annot_type in total_per_kind:
        try:
            print(
                f'{annot_type}: {up_to_parameteric_per_kind[annot_type] / total_per_kind[annot_type] * 100.0 :.2f} ({up_to_parameteric_per_kind[annot_type]}/{total_per_kind[annot_type]})'
            )
            print(
                f"Common - {annot_type}: {corr_param_per_kind[annot_type]['corr_common'] / (corr_param_per_kind[annot_type]['corr_common'] + corr_param_per_kind[annot_type]['incorr_common']) * 100.0 :.2f}"
            )
            print(
                f"Rare - {annot_type}: {corr_param_per_kind[annot_type]['corr_rare'] / (corr_param_per_kind[annot_type]['corr_rare'] + corr_param_per_kind[annot_type]['incorr_rare']) * 100.0 :.2f}"
            )
            print("******************************")
        except ZeroDivisionError:
            pass

    r = classification_report(true_annotation,
                              pred_annotation,
                              output_dict=True)
    print("Precision: %.2f" % (r['weighted avg']['precision'] * 100))
    print("Recall: %.2f" % (r['weighted avg']['recall'] * 100))
    print("F1-score: %.2f" % (r['weighted avg']['f1-score'] * 100))
    print("******************************")
    print(
        f"Exact - All: {corr_exact['all']/len(true_annotation)*100.0:.2f} common: {corr_exact['corr_common'] / (corr_exact['corr_common'] + corr_exact['incorr_common'])*100.0:.2f} rare: {corr_exact['corr_rare'] / (corr_exact['corr_rare'] + corr_exact['incorr_rare'])*100.0:.2f}"
    )
    print(
        f"Parameteric - All: {corr_param['all']/len(true_annotation)*100.0:.2f} common: {corr_param['corr_common'] / (corr_param['corr_common'] + corr_param['incorr_common'])*100.0:.2f} rare: {corr_param['corr_rare'] / (corr_param['corr_rare'] + corr_param['incorr_rare'])*100.0:.2f}"
    )