def greedy_decode(self, data: List[CodeGraph2Seq], trained_network: Graph2SeqModule, device: Any) -> List[Tuple[List[str], float]]: decoded_sequences = [] for mb_data, input_data in self.minibatch_iterator( self.tensorize_dataset(iter(data), return_input_data=True), device, max_minibatch_size=50, ): input_concrete_values: List[str] = [] for sample in input_data: sample = enforce_not_None(sample) input_concrete_values.extend( sample["node_labels"][k].lower() for k in sample["backbone_sequence"]) with torch.no_grad(): gnn_output = trained_network._gnn( **mb_data["encoder_mb_data"]) # type: GnnOutput mb_outputs = self.__decoder_model.greedy_decode( input_concrete_values=input_concrete_values, input_memories=gnn_output.output_node_representations[ gnn_output.node_idx_references["backbone_nodes"]], input_memories_origin_idx=gnn_output. node_graph_idx_reference["backbone_nodes"], initial_states=trained_network._get_initial_decoder_states( gnn_output), neural_model=trained_network._decoder, ) decoded_sequences.extend(mb_outputs) assert len(decoded_sequences) == len(data) return decoded_sequences
def tensorize( self, datapoint: GraphData[TNodeData] ) -> Optional[TensorizedGraphData[TTensorizedNodeData]]: tensorized_data = TensorizedGraphData( adjacency_lists=list(self.__iterate_edge_types(datapoint)), node_tensorized_data=[ enforce_not_None(self.__node_embedding_model.tensorize(ni)) for ni in datapoint.node_information ], reference_nodes={ n: np.array(np.array(refs, dtype=np.int32)) for n, refs in datapoint.reference_nodes.items() }, num_nodes=len(datapoint.node_information), ) if tensorized_data.num_nodes > self.max_nodes_per_graph: self.LOGGER.warning("Dropping graph with %s nodes." % tensorized_data.num_nodes) return None num_edges = sum(len(adj) for adj in tensorized_data.adjacency_lists) if num_edges > self.max_graph_edges: self.LOGGER.warning("Dropping graph with %s edges." % num_edges) return None return tensorized_data
def tensorize( self, datapoint: GraphData[TNodeData, TEdgeData] ) -> Optional[TensorizedGraphData[TTensorizedNodeData, TTensorizedEdgeData]]: if len(datapoint.node_information) > self.max_nodes_per_graph: self.LOGGER.warning("Dropping graph with %s nodes." % len(datapoint.node_information)) return None if self.__edge_embedding_model is None: tensorized_edge_features = None else: tensorized_edge_features = [] for edge_type in self.__edge_idx_to_type: edge_features_for_edge_type = datapoint.edge_features.get( edge_type) if edge_features_for_edge_type is None: # No edges of type `edge_type` tensorized_edge_features.append([]) else: tensorized_edge_features.append([ self.__edge_embedding_model.tensorize(e) for e in edge_features_for_edge_type ]) tensorized_data = TensorizedGraphData( adjacency_lists=list(self.__iterate_edge_types(datapoint)), node_tensorized_data=[ enforce_not_None(self.__node_embedding_model.tensorize(ni)) for ni in datapoint.node_information ], edge_features=tensorized_edge_features, reference_nodes={ n: np.array(refs, dtype=np.int32) for n, refs in datapoint.reference_nodes.items() }, num_nodes=len(datapoint.node_information), ) num_edges = sum(len(adj) for adj in tensorized_data.adjacency_lists) if num_edges > self.max_graph_edges: self.LOGGER.warning("Dropping graph with %s edges." % num_edges) return None return tensorized_data
def __convert( self, typilus_graph: TypilusGraph) -> Tuple[GraphData[str], List[str]]: def get_adj_list(adjacency_dict): for from_node_idx, to_node_idxs in adjacency_dict.items(): from_node_idx = int(from_node_idx) for to_idx in to_node_idxs: yield (from_node_idx, to_idx) edges = {} for edge_type, adj_dict in typilus_graph["edges"].items(): adj_list: List[Tuple[int, int]] = list(get_adj_list(adj_dict)) if len(adj_list) > 0: edges[edge_type] = np.array(adj_list, dtype=np.int32) else: edges[edge_type] = np.zeros((0, 2), dtype=np.int32) supernode_idxs_with_ground_truth: List[int] = [] supernode_annotations: List[str] = [] for supernode_idx, supernode_data in typilus_graph["supernodes"].items( ): if supernode_data["annotation"] in IGNORED_TYPES: continue if (not self.__tensorize_samples_with_no_annotation and supernode_data["annotation"] is None): continue elif supernode_data["annotation"] is None: supernode_data["annotation"] = "??" supernode_idxs_with_ground_truth.append(int(supernode_idx)) supernode_annotations.append( enforce_not_None(supernode_data["annotation"])) return ( GraphData[str]( node_information=typilus_graph["nodes"], edges=edges, reference_nodes={ "token-sequence": typilus_graph["token-sequence"], "supernodes": supernode_idxs_with_ground_truth, }, ), supernode_annotations, )
def build_neural_module(self) -> SimpleRegressionNetwork: return SimpleRegressionNetwork(enforce_not_None(self.__num_features))
def build_neural_module(self) -> PPIClassification: gnn = self.__gnn_model.build_neural_module() return PPIClassification(gnn, enforce_not_None(self.__num_target_labels))
def build_neural_module(self) -> LinearFeatureEmbedder: return LinearFeatureEmbedder( input_element_size=enforce_not_None(self.__num_input_features), output_embedding_size=self.embedding_size, activation=self.__activation, )