Beispiel #1
0
    def tensorize(
            self,
            datapoint: VarNamingSample) -> Optional[TensorizedVarNamingSample]:
        graph = datapoint["ContextGraph"]
        name = datapoint["name"]
        var_node_idxs = [
            int(k) for k, v in graph["NodeLabels"].items() if v == VAR_TOKEN
        ]

        graph_data = GraphData(
            node_information=[
                graph["NodeLabels"][str(i)]
                for i in range(len(graph["NodeLabels"]))
            ],
            edges=graph["Edges"],
            reference_nodes={"var_node_idxs": var_node_idxs},
        )

        if graph_data is None:
            return None

        self.__add_subtoken_vocab_nodes(graph_data)
        tensorized_graph_data = self.__gnn_model.tensorize(graph_data)
        if tensorized_graph_data is None:
            return None

        return TensorizedVarNamingSample(
            graph=tensorized_graph_data,
            target_idxs=self.__decoder_model.tensorize(name))
Beispiel #2
0
 def update_metadata_from(self, datapoint: VarMisuseSample) -> None:
     graph = datapoint["ContextGraph"]
     graph_data = GraphData(
         node_information=[(graph["NodeLabels"][str(i)], False)
                           for i in range(len(graph["NodeLabels"]))],
         edges=graph["Edges"],
         reference_nodes={},  # This is not needed for metadata loading
     )
     self.__add_subtoken_vocab_nodes(graph_data)
     self.__gnn_model.update_metadata_from(graph_data)
Beispiel #3
0
 def update_metadata_from(self, datapoint: PPIGraphSample) -> None:
     self.__gnn_model.update_metadata_from(
         GraphData(
             node_information=datapoint.node_features,
             edges={
                 f"e{i}": a
                 for i, a in enumerate(datapoint.adjacency_lists)
             },
             reference_nodes={},
         ), )
     if self.__num_target_labels is None:
         self.__num_target_labels = datapoint.node_labels.shape[1]
     else:
         assert self.__num_target_labels == datapoint.node_labels.shape[1]
Beispiel #4
0
    def update_metadata_from(self, datapoint: CodeGraph2Seq) -> None:
        graph_nodes = [l.lower() for l in datapoint["node_labels"]]
        self.__gnn_model.update_metadata_from(
            GraphData(
                node_information=graph_nodes,
                edges=datapoint["edges"],
                reference_nodes={
                    "backbone_nodes": datapoint["backbone_sequence"]
                },
            ))

        self.__decoder_model.update_metadata_from(
            DecoderData(
                input_elements=[
                    graph_nodes[k] for k in datapoint["backbone_sequence"]
                ],
                target_data=datapoint["method_name"],
            ), )
Beispiel #5
0
    def tensorize(
            self,
            datapoint: VarMisuseSample) -> Optional[TensorizedVarMisuseSample]:
        graph = datapoint["ContextGraph"]
        all_correct_slots = [
            i for i, cand_symbol in enumerate(datapoint["SymbolCandidates"])
            if cand_symbol["IsCorrect"]
        ]
        assert len(all_correct_slots) == 1

        candidate_node_ids = {
            s["SymbolDummyNode"]
            for s in datapoint["SymbolCandidates"]
        }
        graph_data = GraphData(
            node_information=[(graph["NodeLabels"][str(i)], i
                               in candidate_node_ids)
                              for i in range(len(graph["NodeLabels"]))],
            edges=graph["Edges"],
            reference_nodes={
                "candidate_nodes":
                [s["SymbolDummyNode"] for s in datapoint["SymbolCandidates"]],
                "slot_node_idx": [datapoint["SlotDummyNode"]],
            },
        )

        if graph_data is None:
            return None

        self.__add_subtoken_vocab_nodes(graph_data)
        tensorized_graph_data = self.__gnn_model.tensorize(graph_data)
        if tensorized_graph_data is None:
            return None

        return TensorizedVarMisuseSample(
            graph=tensorized_graph_data,
            target_candidate_node_idx=all_correct_slots[0],
            num_candidate_nodes=len(datapoint["SymbolCandidates"]),
        )
Beispiel #6
0
    def tensorize(self,
                  datapoint: CodeGraph2Seq) -> Optional[TensorizedGraph2Seq]:
        graph_nodes = [l.lower() for l in datapoint["node_labels"]]
        graph_data = self.__gnn_model.tensorize(
            GraphData(
                node_information=graph_nodes,
                edges=datapoint["edges"],
                reference_nodes={
                    "backbone_nodes": datapoint["backbone_sequence"]
                },
            ))
        if graph_data is None:
            return None  # Discard example

        target_data = self.__decoder_model.tensorize(
            DecoderData(
                input_elements=[
                    graph_nodes[k] for k in datapoint["backbone_sequence"]
                ],
                target_data=datapoint["method_name"],
            ))

        return TensorizedGraph2Seq(encoder_data=graph_data,
                                   decoder_data=target_data)