def make_all_py_reader_inputs(input_fields, is_test=False): reader = layers.py_reader( capacity=20, name="test_reader" if is_test else "train_reader", shapes=[input_descs[input_field][0] for input_field in input_fields], dtypes=[input_descs[input_field][1] for input_field in input_fields], lod_levels=[ input_descs[input_field][2] if len(input_descs[input_field]) == 3 else 0 for input_field in input_fields ]) return layers.read_file(reader), reader
def make_all_py_reader_inputs(input_fields, is_test=False): """ Define the input data layers for the transformer model. """ reader = layers.py_reader( capacity=20, name="test_reader" if is_test else "train_reader", shapes=[input_descs[input_field][0] for input_field in input_fields], dtypes=[input_descs[input_field][1] for input_field in input_fields], lod_levels=[ input_descs[input_field][2] if len(input_descs[input_field]) == 3 else 0 for input_field in input_fields ], use_double_buffer=True) return layers.read_file(reader), reader
def node2vec_model(graph, hidden_size=16, neg_num=5): pyreader = l.py_reader( capacity=70, shapes=[[-1, 1, 1], [-1, 1, 1], [-1, neg_num, 1]], dtypes=['int64', 'int64', 'int64'], lod_levels=[0, 0, 0], name='train', use_double_buffer=True) embed_init = fluid.initializer.UniformInitializer(low=-1.0, high=1.0) weight_init = fluid.initializer.TruncatedNormal(scale=1.0 / math.sqrt(hidden_size)) src, pos, negs = l.read_file(pyreader) embed_src = l.embedding( input=src, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='content', initializer=embed_init)) weight_pos = l.embedding( input=pos, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='weight', initializer=weight_init)) weight_negs = l.embedding( input=negs, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='weight', initializer=weight_init)) pos_logits = l.matmul( embed_src, weight_pos, transpose_y=True) # [batch_size, 1, 1] neg_logits = l.matmul( embed_src, weight_negs, transpose_y=True) # [batch_size, 1, neg_num] ones_label = pos_logits * 0. + 1. ones_label.stop_gradient = True pos_loss = l.sigmoid_cross_entropy_with_logits(pos_logits, ones_label) zeros_label = neg_logits * 0. zeros_label.stop_gradient = True neg_loss = l.sigmoid_cross_entropy_with_logits(neg_logits, zeros_label) loss = (l.reduce_mean(pos_loss) + l.reduce_mean(neg_loss)) / 2 return pyreader, loss
def node_classify_model(graph, num_labels, hidden_size=16, name='node_classify_task'): pyreader = l.py_reader(capacity=70, shapes=[[-1, 1], [-1, num_labels]], dtypes=['int64', 'float32'], lod_levels=[0, 0], name=name + '_pyreader', use_double_buffer=True) nodes, labels = l.read_file(pyreader) embed_nodes = l.embedding(input=nodes, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) embed_nodes.stop_gradient = True logits = l.fc(input=embed_nodes, size=num_labels) loss = l.sigmoid_cross_entropy_with_logits(logits, labels) loss = l.reduce_mean(loss) prob = l.sigmoid(logits) topk = l.reduce_sum(labels, -1) return pyreader, loss, prob, labels, topk
def node_classify_model(graph, num_labels, embed_dim=16, name='node_classify_task'): """Build node classify model. Args: graph: The :code:`Graph` data object. num_labels: The number of labels. embed_dim: The dimension of embedding. name: The name of the model. """ pyreader = l.py_reader( capacity=70, shapes=[[-1, 1], [-1, num_labels]], dtypes=['int64', 'float32'], lod_levels=[0, 0], name=name + '_pyreader', use_double_buffer=True) nodes, labels = l.read_file(pyreader) embed_nodes = l.embedding( input=nodes, size=[graph.num_nodes, embed_dim], param_attr='shared_w') embed_nodes.stop_gradient = True logits = l.fc(input=embed_nodes, size=num_labels) loss = l.sigmoid_cross_entropy_with_logits(logits, labels) loss = l.reduce_mean(loss) prob = l.sigmoid(logits) topk = l.reduce_sum(labels, -1) return { 'pyreader': pyreader, 'loss': loss, 'prob': prob, 'labels': labels, 'topk': topk }
def forward(self): src, dsts = L.read_file(self.pyreader) if self.is_sparse: src = L.reshape(src, [-1, 1]) dsts = L.reshape(dsts, [-1, 1]) if self.num_part is not None and self.num_part != 1 and not self.is_distributed: src_embed = distributed_embedding(src, self.num_nodes, self.embed_dim, self.embed_init, "weight", self.num_part, self.is_sparse, learning_rate=self.embedding_lr) dsts_embed = distributed_embedding(dsts, self.num_nodes, self.embed_dim, self.embed_init, "weight", self.num_part, self.is_sparse, learning_rate=self.embedding_lr) else: src_embed = L.embedding(src, (self.num_nodes, self.embed_dim), self.is_sparse, self.is_distributed, param_attr=F.ParamAttr( name="weight", learning_rate=self.embedding_lr, initializer=self.embed_init)) dsts_embed = L.embedding(dsts, (self.num_nodes, self.embed_dim), self.is_sparse, self.is_distributed, param_attr=F.ParamAttr( name="weight", learning_rate=self.embedding_lr, initializer=self.embed_init)) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.embed_dim]) dsts_embed = L.reshape(dsts_embed, [-1, self.neg_num + 1, self.embed_dim]) logits = L.matmul(src_embed, dsts_embed, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def forward(self): """ forward """ src, dst = L.read_file(self.pyreader) src_id = L.slice(src, [0, 1, 2, 3], [0, 0, 0, 0], [int(math.pow(2, 30)) - 1, 1, 1, 1]) dst_id = L.slice(dst, [0, 1, 2, 3], [0, 0, 0, 0], [int(math.pow(2, 30)) - 1, self.neg_num + 1, 1, 1]) if self.is_sparse: # sparse mode use 2 dims input. src = L.reshape(src, [-1, 1]) dst = L.reshape(dst, [-1, 1]) # [b, 1, f, h] src_embed = split_embedding(src, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) # [b, n+1, f, h] dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.num_featuers, self.hidden_size]) dst_embed = L.reshape( dst_embed, [-1, self.neg_num + 1, self.num_featuers, self.hidden_size]) # [b, 1, 1, f] src_weight = L.softmax( L.embedding(src_id, [self.num_nodes, self.num_featuers], param_attr=F.ParamAttr(name="alpha"))) # [b, n+1, 1, f] dst_weight = L.softmax( L.embedding(dst_id, [self.num_nodes, self.num_featuers], param_attr=F.ParamAttr(name="alpha"))) # [b, 1, h] src_sum = L.squeeze(L.matmul(src_weight, src_embed), axes=[2]) # [b, n+1, h] dst_sum = L.squeeze(L.matmul(dst_weight, dst_embed), axes=[2]) logits = L.matmul(src_sum, dst_sum, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss