def forward(self, is_test=False): """ Build the network. """ substruct_graph_wrapper = GraphWrapper( name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) context_graph_wrapper = GraphWrapper( name="context_graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) substruct_center_idx = layers.data(name="substruct_center_idx", shape=[-1, 1], dtype="int64") context_overlap_idx = layers.data(name="context_overlap_idx", shape=[-1, 1], dtype="int64") context_overlap_lod = layers.data(name="context_overlap_lod", shape=[1, -1], dtype="int32") context_cycle_index = layers.data(name="context_cycle_index", shape=[-1, 1], dtype="int64") substruct_node_repr = self.substruct_model.forward( substruct_graph_wrapper, is_test=is_test) substruct_repr = layers.gather(substruct_node_repr, substruct_center_idx) context_node_repr = self.context_model.forward(context_graph_wrapper, is_test=is_test) context_overlap_repr = layers.gather(context_node_repr, context_overlap_idx) context_repr = layers.sequence_pool( layers.lod_reset(context_overlap_repr, context_overlap_lod), self.context_pooling) neg_context_repr = layers.gather(context_repr, context_cycle_index) pred_pos = layers.reduce_sum(substruct_repr * context_repr, 1) pred_neg = layers.reduce_sum(substruct_repr * neg_context_repr, 1) label_pos = pred_pos * 0.0 + 1.0 label_pos.stop_gradient = True label_neg = pred_neg * 0.0 label_neg.stop_gradient = True loss = layers.sigmoid_cross_entropy_with_logits(x=pred_pos, label=label_pos) \ + layers.sigmoid_cross_entropy_with_logits(x=pred_neg, label=label_neg) loss = layers.reduce_mean(loss) self.substruct_graph_wrapper = substruct_graph_wrapper self.context_graph_wrapper = context_graph_wrapper self.loss = loss
def create_model(args, config, graph_label): """Create model for given model configuration.""" logging.info('building model') graph_wrapper = GraphWrapper(name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) encoder = GINEncoder(config) global_repr, patch_summary = encoder.forward(graph_wrapper) hid = L.fc(global_repr, config['hidden_size'], act='relu', name='finetune_fc1') hid = L.fc(hid, config['hidden_size'], act='relu', name='finetune_fc2') logits = L.fc(global_repr, args.num_tasks, name="finetune_fc3") loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=graph_label) loss = L.reduce_mean(loss) pred = L.sigmoid(logits) keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb', 'pred'] Agent = namedtuple('Agent', keys) return Agent(loss=loss, graph_wrapper=graph_wrapper, encoder=encoder, graph_emb=global_repr, pred=pred)
def __init__(self, model_config, use_pretrained_compound_gnns=False): self.model_config = model_config self.use_pretrained_compound_gnns = use_pretrained_compound_gnns dim = CompoundConstants.atomic_numeric_feat_dim self.compound_graph_wrapper = GraphWrapper( name="compound_graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64"), ('atom_numeric_feat', [None, dim], "float32")], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64") ]) protein_token = fluid.layers.data(name='protein_token', shape=[None, 1], dtype='int64') protein_token_lod = fluid.layers.data(name='protein_token_lod', shape=[None], dtype='int32') self.protein_token = fluid.layers.lod_reset(protein_token, y=protein_token_lod) if use_pretrained_compound_gnns: self.compound_model = PretrainGNNModel(model_config['compound'], name='gnn') # TODO: update the name to 'compound' else: self.compound_model = CompoundGNNModel(model_config['compound'], name='compound') self.protein_model = ProteinSequenceModel(model_config['protein'], name='protein')
def forward(self, is_test=False): """ Build the network. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64") masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) masked_node_repr = layers.gather(node_repr, masked_node_indice) logits = layers.fc(masked_node_repr, size=len(CompoundConstants.atom_num_list), name="masked_node_logits") loss, pred = layers.softmax_with_cross_entropy( logits, masked_node_label, return_softmax=True) loss = layers.reduce_mean(loss) acc = layers.accuracy(pred, masked_node_label) self.graph_wrapper = graph_wrapper self.loss = loss
def forward(self, is_test=False): """tbd""" graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64") ], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64") ]) supervised_label = layers.data(name="supervised_label", shape=[None, self.task_num], dtype="float32") valid = layers.data("valid", shape=[None, self.task_num], dtype="float32") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr, self.pool_type) logits = layers.fc(graph_repr, size=self.task_num, name="pretrain_supervised_fc") loss = layers.sigmoid_cross_entropy_with_logits(x=logits, label=supervised_label) loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid) self.graph_wrapper = graph_wrapper self.loss = loss
def create_model(args, config): """Create model for given model configuration.""" logging.info('building model') graph_wrapper = GraphWrapper(name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) # NOTE: [num_nodes, num_graphs], bs = num_graphs pos_mask = L.data(name='pos_mask', shape=[-1, args.batch_size], dtype='float32') neg_mask = L.data(name='neg_mask', shape=[-1, args.batch_size], dtype='float32') encoder = GINEncoder(config) global_repr, patch_summary = encoder.forward(graph_wrapper) global_D = FF(encoder.embedding_dim) local_D = FF(encoder.embedding_dim) g_enc = global_D.forward(global_repr) l_enc = local_D.forward(patch_summary) res = L.matmul(l_enc, g_enc, transpose_y=True) E_pos = get_positive_expectation(res * pos_mask, config['measure'], average=False) E_pos = L.reduce_sum(E_pos) / graph_wrapper.num_nodes E_neg = get_negative_expectation(res * neg_mask, config['measure'], average=False) E_neg = L.reduce_sum(E_neg) / (graph_wrapper.num_nodes * (graph_wrapper.num_graph - 1)) local_global_loss = E_neg - E_pos if config['prior']: prior_D = PriorDiscriminator(encoder.embedding_dim) prior = L.uniform_random([args.batch_size, encoder.embedding_dim], min=0.0, max=1.0) term_1 = L.reduce_mean(L.log(prior_D.forward(prior))) term_2 = L.reduce_mean(L.log(1.0 - prior_D.forward(global_repr))) prior_loss = -(term_1 + term_2) * config['gamma'] else: prior_loss = 0 total_loss = local_global_loss + prior_loss keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb'] Agent = namedtuple('Agent', keys) return Agent(loss=total_loss, graph_wrapper=graph_wrapper, encoder=encoder, graph_emb=global_repr)
def __init__(self, args, graph_wrapper=None): self.hidden_size = args.hidden_size self.num_nodes = args.num_nodes self.drop_rate = args.drop_rate node_feature = [('feat', [None, 128], "float32")] if graph_wrapper is None: self.graph_wrapper = GraphWrapper( name="graph", place=F.CPUPlace(), node_feat=node_feature) else: self.graph_wrapper = graph_wrapper self.build_model(args)
def forward(self, is_test=False): """ Define the forward function,set the parameter layer options. Graph wrapper creates a graph data holders that attributes and features in the graph are :code:`fluid.layers.data`.And we provide interface : code:`to_feed` to help converting :code:`Graph`data into :code:`feed_dict`. Args: name: The graph data prefix,here is graph node_feat: A list of tuples that decribe the details of node feature tenosr. Each tuple must be (name, shape, dtype) and the first dimension of the shape must be set unknown (-1 or None) or we can easily use :code:`Graph.node_feat_info()` to get the node_feat settings. edge_feat: A list of tuples that decribe the details of edge feature tenosr. Each tuple mush be (name, shape, dtype) and the first dimension of the shape must be set unknown (-1 or None) or we can easily use :code:`Graph.edge_feat_info()` to get the edge_feat settings. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64") ], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64") ]) finetune_label = layers.data(name="finetune_label", shape=[None, self.num_tasks], dtype="float32") valid = layers.data("valid", shape=[None, self.num_tasks], dtype="float32") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr, self.pool_type) logits = layers.fc(graph_repr, size=self.num_tasks, name="finetune_fc") loss = layers.sigmoid_cross_entropy_with_logits(x=logits, label=finetune_label) loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid) pred = layers.sigmoid(logits) self.graph_wrapper = graph_wrapper self.loss = loss self.pred = pred self.finetune_label = finetune_label
def __init__(self, args, dataset): self.args = args self.dataset = dataset self.hidden_size = args.hidden_size self.num_classes = args.num_classes self.num_features = args.num_features self.pooling_ratio = args.pooling_ratio self.dropout_ratio = args.dropout_ratio self.batch_size = args.batch_size graph_data = [] g, label = self.dataset[0] graph_data.append(g) g, label = self.dataset[1] graph_data.append(g) batch_graph = MultiGraph(graph_data) indegree = batch_graph.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) batch_graph.node_feat["norm"] = np.expand_dims(norm, -1) graph_data = batch_graph self.graph_wrapper = GraphWrapper( name="graph", node_feat=graph_data.node_feat_info()) self.labels = L.data("labels", shape=[None, self.args.num_classes], dtype="int32", append_batch_size=False) self.labels_1dim = L.data("labels_1dim", shape=[None], dtype="int32", append_batch_size=False) self.graph_id = L.data("graph_id", shape=[None], dtype="int32", append_batch_size=False) if self.args.dataset_name == "FRANKENSTEIN": self.gcn = gcn else: self.gcn = norm_gcn self.build_model()
def __init__(self, name, edge_types, node_feat={}, edge_feat={}, **kwargs): self.__data_name_prefix = name self._edge_types = edge_types self._multi_gw = {} for edge_type in self._edge_types: type_name = self.__data_name_prefix + '/' + edge_type if node_feat: n_feat = node_feat else: n_feat = {} if edge_feat: e_feat = edge_feat[edge_type] else: e_feat = {} self._multi_gw[edge_type] = GraphWrapper( name=type_name, node_feat=n_feat, edge_feat=e_feat)
def __init__(self, args, dataset): self.args = args self.dataset = dataset self.hidden_size = self.args.hidden_size self.embed_dim = self.args.embed_dim self.dropout_prob = self.args.dropout_rate self.pool_type = self.args.pool_type self._init_vars = [] graph_data = [] g, label = self.dataset[0] graph_data.append(g) g, label = self.dataset[1] graph_data.append(g) batch_graph = pgl.graph.MultiGraph(graph_data) graph_data = batch_graph graph_data.edge_feat["feat"] = graph_data.edge_feat["feat"].astype( "int64") graph_data.node_feat["feat"] = graph_data.node_feat["feat"].astype( "int64") self.graph_wrapper = GraphWrapper( name="graph", place=F.CPUPlace(), node_feat=graph_data.node_feat_info(), edge_feat=graph_data.edge_feat_info()) self.atom_encoder = AtomEncoder(name="atom", emb_dim=self.embed_dim) self.bond_encoder = BondEncoder(name="bond", emb_dim=self.embed_dim) self.labels = L.data("labels", shape=[None, self.args.num_class], dtype="float32", append_batch_size=False) self.unmask = L.data("unmask", shape=[None, self.args.num_class], dtype="float32", append_batch_size=False) self.build_model()
def forward(self, is_test=False): """ Define the forward function,set the parameter layer options. Graph wrapper creates a graph data holders that attributes and features in the graph are :code:`fluid.layers.data`.And we provide interface : code:`to_feed` to help converting :code:`Graph`data into :code:`feed_dict`. Args: name: The graph data prefix,here is graph node_feat: A list of tuples that decribe the details of node feature tenosr. Each tuple must be (name, shape, dtype) and the first dimension of the shape must be set unknown (-1 or None) or we can easily use :code:`Graph.node_feat_info()` to get the node_feat settings. edge_feat: A list of tuples that decribe the details of edge feature tenosr. Each tuple mush be (name, shape, dtype) and the first dimension of the shape must be set unknown (-1 or None) or we can easily use :code:`Graph.edge_feat_info()` to get the edge_feat settings. Returns: logits: the model prediction. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64") ], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64") ]) node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr, self.pool_type) logits = layers.fc(graph_repr, size=self.num_tasks, name="finetune_fc") return graph_wrapper, logits
def __init__(self, args, task): candi_tasks = [ "predict_query", "predict_poi", "pointwise", "pairwise", "listwise", "listwise_hinge"] if task not in candi_tasks: raise ValueError("task %s not in %s" % (task, candi_tasks)) self.norm_score = args.norm_score self.ernie_config = ErnieConfig(args.ernie_config_path) self.ernie_config.print_config() self.city_size = 20000 self.hidden_size = 64 self._holder_list = [] node_feature = [ ('src_ids', [None, args.max_seq_len], "int64"), ('pos_ids', [None, args.max_seq_len], "int64"), ('sent_ids', [None, args.max_seq_len], "int64"), ('input_mask', [None, args.max_seq_len], "float32"), ('node_types', [None], "int32"), ] if task != 'predict_query': self.graph_wrapper = GraphWrapper( name="graph", place=F.CPUPlace(), node_feat=node_feature) self._holder_list.extend(self.graph_wrapper.holder_list) elif task == "predict_query": # This is for save_inference_mode for query self.graph_wrapper = FakeGraphWrapper( node_feat=node_feature) self._holder_list.extend(self.graph_wrapper.holder_list) self.build_model(args, task)