Ejemplo n.º 1
0
    def greedy_decode(self, data: List[CodeGraph2Seq],
                      trained_network: Graph2SeqModule,
                      device: Any) -> List[Tuple[List[str], float]]:
        decoded_sequences = []
        for mb_data, input_data in self.minibatch_iterator(
                self.tensorize_dataset(iter(data), return_input_data=True),
                device,
                max_minibatch_size=50,
        ):
            input_concrete_values: List[str] = []
            for sample in input_data:
                sample = enforce_not_None(sample)
                input_concrete_values.extend(
                    sample["node_labels"][k].lower()
                    for k in sample["backbone_sequence"])

            with torch.no_grad():
                gnn_output = trained_network._gnn(
                    **mb_data["encoder_mb_data"])  # type: GnnOutput
                mb_outputs = self.__decoder_model.greedy_decode(
                    input_concrete_values=input_concrete_values,
                    input_memories=gnn_output.output_node_representations[
                        gnn_output.node_idx_references["backbone_nodes"]],
                    input_memories_origin_idx=gnn_output.
                    node_graph_idx_reference["backbone_nodes"],
                    initial_states=trained_network._get_initial_decoder_states(
                        gnn_output),
                    neural_model=trained_network._decoder,
                )
                decoded_sequences.extend(mb_outputs)

        assert len(decoded_sequences) == len(data)
        return decoded_sequences
Ejemplo n.º 2
0
    def tensorize(
        self, datapoint: GraphData[TNodeData]
    ) -> Optional[TensorizedGraphData[TTensorizedNodeData]]:
        tensorized_data = TensorizedGraphData(
            adjacency_lists=list(self.__iterate_edge_types(datapoint)),
            node_tensorized_data=[
                enforce_not_None(self.__node_embedding_model.tensorize(ni))
                for ni in datapoint.node_information
            ],
            reference_nodes={
                n: np.array(np.array(refs, dtype=np.int32))
                for n, refs in datapoint.reference_nodes.items()
            },
            num_nodes=len(datapoint.node_information),
        )

        if tensorized_data.num_nodes > self.max_nodes_per_graph:
            self.LOGGER.warning("Dropping graph with %s nodes." %
                                tensorized_data.num_nodes)
            return None

        num_edges = sum(len(adj) for adj in tensorized_data.adjacency_lists)
        if num_edges > self.max_graph_edges:
            self.LOGGER.warning("Dropping graph with %s edges." % num_edges)
            return None

        return tensorized_data
Ejemplo n.º 3
0
    def tensorize(
        self, datapoint: GraphData[TNodeData, TEdgeData]
    ) -> Optional[TensorizedGraphData[TTensorizedNodeData,
                                      TTensorizedEdgeData]]:
        if len(datapoint.node_information) > self.max_nodes_per_graph:
            self.LOGGER.warning("Dropping graph with %s nodes." %
                                len(datapoint.node_information))
            return None

        if self.__edge_embedding_model is None:
            tensorized_edge_features = None
        else:
            tensorized_edge_features = []
            for edge_type in self.__edge_idx_to_type:
                edge_features_for_edge_type = datapoint.edge_features.get(
                    edge_type)
                if edge_features_for_edge_type is None:
                    # No edges of type `edge_type`
                    tensorized_edge_features.append([])
                else:
                    tensorized_edge_features.append([
                        self.__edge_embedding_model.tensorize(e)
                        for e in edge_features_for_edge_type
                    ])

        tensorized_data = TensorizedGraphData(
            adjacency_lists=list(self.__iterate_edge_types(datapoint)),
            node_tensorized_data=[
                enforce_not_None(self.__node_embedding_model.tensorize(ni))
                for ni in datapoint.node_information
            ],
            edge_features=tensorized_edge_features,
            reference_nodes={
                n: np.array(refs, dtype=np.int32)
                for n, refs in datapoint.reference_nodes.items()
            },
            num_nodes=len(datapoint.node_information),
        )

        num_edges = sum(len(adj) for adj in tensorized_data.adjacency_lists)
        if num_edges > self.max_graph_edges:
            self.LOGGER.warning("Dropping graph with %s edges." % num_edges)
            return None

        return tensorized_data
Ejemplo n.º 4
0
    def __convert(
            self,
            typilus_graph: TypilusGraph) -> Tuple[GraphData[str], List[str]]:
        def get_adj_list(adjacency_dict):
            for from_node_idx, to_node_idxs in adjacency_dict.items():
                from_node_idx = int(from_node_idx)
                for to_idx in to_node_idxs:
                    yield (from_node_idx, to_idx)

        edges = {}
        for edge_type, adj_dict in typilus_graph["edges"].items():
            adj_list: List[Tuple[int, int]] = list(get_adj_list(adj_dict))
            if len(adj_list) > 0:
                edges[edge_type] = np.array(adj_list, dtype=np.int32)
            else:
                edges[edge_type] = np.zeros((0, 2), dtype=np.int32)

        supernode_idxs_with_ground_truth: List[int] = []
        supernode_annotations: List[str] = []
        for supernode_idx, supernode_data in typilus_graph["supernodes"].items(
        ):
            if supernode_data["annotation"] in IGNORED_TYPES:
                continue
            if (not self.__tensorize_samples_with_no_annotation
                    and supernode_data["annotation"] is None):
                continue
            elif supernode_data["annotation"] is None:
                supernode_data["annotation"] = "??"
            supernode_idxs_with_ground_truth.append(int(supernode_idx))
            supernode_annotations.append(
                enforce_not_None(supernode_data["annotation"]))

        return (
            GraphData[str](
                node_information=typilus_graph["nodes"],
                edges=edges,
                reference_nodes={
                    "token-sequence": typilus_graph["token-sequence"],
                    "supernodes": supernode_idxs_with_ground_truth,
                },
            ),
            supernode_annotations,
        )
Ejemplo n.º 5
0
 def build_neural_module(self) -> SimpleRegressionNetwork:
     return SimpleRegressionNetwork(enforce_not_None(self.__num_features))
Ejemplo n.º 6
0
 def build_neural_module(self) -> PPIClassification:
     gnn = self.__gnn_model.build_neural_module()
     return PPIClassification(gnn,
                              enforce_not_None(self.__num_target_labels))
Ejemplo n.º 7
0
 def build_neural_module(self) -> LinearFeatureEmbedder:
     return LinearFeatureEmbedder(
         input_element_size=enforce_not_None(self.__num_input_features),
         output_embedding_size=self.embedding_size,
         activation=self.__activation,
     )