def build(self): samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos) num_samples = [layer_info.num_samples for layer_info in self.layer_infos] self.outputs1, self.aggregators = self.aggregate(samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) dim_mult = 2 if self.concat else 1 self.outputs1 = layers.Dense( dim_mult * self.dims[-1], dim_mult * self.dims[-1], dropout=self.placeholders['dropout'], act=tf.nn.relu)(self.outputs1) self.outputs1 = layers.Dense( dim_mult * self.dims[-1], dim_mult * self.dims[-1], dropout=self.placeholders['dropout'], act=lambda x: x, bias=False)(self.outputs1) self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) self.node_pred = layers.Dense(dim_mult * self.dims[-1], self.num_classes, dropout=self.placeholders['dropout'], act=lambda x: x) # TF graph management self.node_preds = self.node_pred(self.outputs1) self._loss() grads_and_vars = self.optimizer.compute_gradients(self.loss) clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars] self.grad, _ = clipped_grads_and_vars[0] self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) self.preds = self.predict()
def _build(self): self.layers.append(layers.Dense(input_dim=self.input_dim, output_dim=self.dims[1], act=tf.nn.relu, dropout=self.placeholders['dropout'], sparse_inputs=False, logging=self.logging)) self.layers.append(layers.Dense(input_dim=self.dims[1], output_dim=self.output_dim, act=lambda x: x, dropout=self.placeholders['dropout'], logging=self.logging))
def build(self): samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos)#采样,得到样本与支持集大小 num_samples = [layer_info.num_samples for layer_info in self.layer_infos] self.outputs1, self.aggregators = self.aggregate(samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) #实例化聚集 dim_mult = 2 if self.concat else 1 self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) dim_mult = 2 if self.concat else 1 #全连接层,进行预测 self.node_pred = layers.Dense(dim_mult*self.dims[-1], self.num_classes, dropout=self.placeholders['dropout'], act=lambda x : x) # TF graph management self.node_preds = self.node_pred(self.outputs1) self._loss() # 计算梯度 grads_and_vars = self.optimizer.compute_gradients(self.loss) #梯度采集 clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars] self.grad, _ = clipped_grads_and_vars[0] #更新梯度 self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) # sigmod 用于多类别, softmax用于单类别 self.preds = self.predict()
def __init__(self, input_dim, num_classes, layers_info, **kwargs): ''' Args: - input_dim: dimension of the input embeddings - num_classes: number of classes (dimension of the output). - layer_infos: List of ClassifierInfo namedtuples that describe the parameters of all the classifier layers. ''' self.layers = [] # Create hidden layers last_layer_dim = input_dim for dim, dropout, act in layers_info: layer = layers.Dense(last_layer_dim, dim, dropout=dropout, act=act) last_layer_dim = dim self.layers.append(layer) output_layer = layers.Dense(last_layer_dim, num_classes, act=lambda x: x) self.layers.append(output_layer)
def build(self): # samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos) # No need to sample use next door batch_size = self.batch_size samples = [self.inputs1] # size of convolution support at each layer per node support_size = 1 support_sizes = [support_size] layer_infos = self.layer_infos for k in range(len(layer_infos)): t = len(layer_infos) - k - 1 support_size *= layer_infos[t].num_samples if k ==0: node = self.hop1 else: assert(k==1) node = self.hop2 samples.append(node) # samples.append(tf.reshape(node, [support_size * batch_size,])) support_sizes.append(support_size) # print(samples) samples1, support_sizes1 = samples, support_sizes num_samples = [layer_info.num_samples for layer_info in self.layer_infos] # num_samples.insert(0,1) self.outputs1, self.aggregators = self.aggregate(samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) dim_mult = 2 if self.concat else 1 self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) dim_mult = 2 if self.concat else 1 self.node_pred = layers.Dense(dim_mult*self.dims[-1], self.num_classes, dropout=self.placeholders['dropout'], act=lambda x : x) # TF graph management self.node_preds = self.node_pred(self.outputs1) self._loss() grads_and_vars = self.optimizer.compute_gradients(self.loss) clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars] self.grad, _ = clipped_grads_and_vars[0] self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) self.preds = self.predict()
def build(self): # sample1是被摊平的,但是通过support size可以reshape回来 # 这里有个思想和计算一阶二阶邻居不一样,就是这里的邻居是随机采样邻居!!因此可以将邻居的数量normalize一下,这样做的好处 # 有两个:1.所有节点的邻居数量等长,计算方便;2.邻居数量可控,避免某些超级节点导致的内存消耗非常巨大的问题 samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos) num_samples = [ layer_info.num_samples for layer_info in self.layer_infos ] self.outputs1, self.aggregators = self.aggregate( samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) dim_mult = 2 if self.concat else 1 # 最后一层没有经过relu,所以归一化一下 self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) dim_mult = 2 if self.concat else 1 # 最后初始化一个dense层做预测 self.node_pred = layers.Dense(dim_mult * self.dims[-1], self.num_classes, dropout=self.placeholders['dropout'], act=lambda x: x) # TF graph management # 进行预测 self.node_preds = self.node_pred(self.outputs1) self._loss() grads_and_vars = self.optimizer.compute_gradients(self.loss) # 梯度clipping,防止梯度爆炸 clipped_grads_and_vars = [ (tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars ] self.grad, _ = clipped_grads_and_vars[0] self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) # 因为最后返回的self.node_preds是logits,因此这里再经过一个softmax,事实上计算损失的时候已经经过softmax了 self.preds = self.predict()
def build(self): ##################### # [z]: SAMPLING # # for all layers # ##################### # [z]: samples1: [array of 512, array of 5120, array of 128000] # [Z]: should get the adj matrix connecting the two layers """ Build the sample graph with adj info in self.sample() """ samples1, support_sizes1 = self.sample( self.inputs1, self.layer_infos) # [z]: check neigh_sampler.py z.debug_vars['supervised_models/build/samples1'] = samples1 # [z]: num_samples = [25,10] num_samples = [ layer_info.num_samples for layer_info in self.layer_infos ] # [z]: self.aggregate is in superclass ##################### # [z]: FORWARD PROP # ##################### # [z]: self.features is the input features for each node (a length 50 vector) # [z]: self.dims is the number of input features for each conv layer self.outputs1, self.aggregators = self.aggregate( samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) dim_mult = 2 if self.concat else 1 ##################### # [z]: OUPTUT LAYER # ##################### self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) dim_mult = 2 if self.concat else 1 # [z]: final output, predict class # [z]: self.num_classes = 121 # self.dims = [50,128,128] # dim_mult = 2 # self.num_classes = 121 self.node_pred = layers.Dense(dim_mult * self.dims[-1], self.num_classes, dropout=self.placeholders['dropout'], act=lambda x: x) # TF graph management # [z]: self.node_preds is R^{?x121}, where 121 is the number of classes self.node_preds = self.node_pred(self.outputs1) ##################### # [z]: BACK PROP # ##################### self._loss() # [z]: start to backprop? grads_and_vars = self.optimizer.compute_gradients(self.loss) clipped_grads_and_vars = [ (tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars ] self.grad, _ = clipped_grads_and_vars[0] # [z]: update param by gradient? self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) self.preds = self.predict()
def build(self): samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos) # add samples samples2, support_sizes2 = self.sample(self.inputs1, self.layer_infos) samples3, support_sizes3 = self.sample(self.inputs1, self.layer_infos) samples4, support_sizes4 = self.sample(self.inputs1, self.layer_infos) samples5, support_sizes5 = self.sample(self.inputs1, self.layer_infos) num_samples = [ layer_info.num_samples for layer_info in self.layer_infos ] self.outputs1, self.aggregators = self.aggregate( samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) #"""" # add outputs self.outputs2, self.aggregators = self.aggregate( samples2, [self.features], self.dims, num_samples, support_sizes2, concat=self.concat, model_size=self.model_size) self.outputs3, self.aggregators = self.aggregate( samples3, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) self.outputs4, self.aggregators = self.aggregate( samples4, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) self.outputs5, self.aggregators = self.aggregate( samples5, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) #""" # conv3 """" self.outputs1 = tf.stack([self.outputs1, self.outputs2, self.outputs3], 2) self.outputs1 = tf.expand_dims(self.outputs1, 0) self.outputs1 = tf.layers.conv2d( inputs=self.outputs1, filters=1, kernel_size=[1, 1], activation='relu' ) self.outputs1 = tf.squeeze(self.outputs1) """ # concat3 #self.outputs1 = tf.concat([self.outputs1, self.outputs2, self.outputs3], 1) dim_mult = 1 * 2 if self.concat else 1 self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) # 这个其实是embedding之后出来的向量。 self.node_pred = layers.Dense(int( (dim_mult * self.dims[-1] + self.num_classes) * 2 / 3), self.num_classes, dropout=self.placeholders['dropout'], act=lambda x: x) self.hidden_layer1 = layers.Dense( dim_mult * self.dims[-1], int((dim_mult * self.dims[-1] + self.num_classes) * 2 / 3), dropout=self.placeholders['dropout'], act=lambda x: x) self.hidden_layer2 = layers.Dense( int((dim_mult * self.dims[-1] + self.num_classes) * 2 / 3), int((dim_mult * self.dims[-1] + self.num_classes) * 2 / 3), dropout=self.placeholders['dropout'], act=lambda x: x) # TF graph management self.node_preds = self.hidden_layer1(self.outputs1) #self.node_preds = self.hidden_layer2(self.node_preds) #self.node_preds = self.hidden_layer2(self.node_preds) self.node_preds = self.node_pred(self.node_preds) self._loss() grads_and_vars = self.optimizer.compute_gradients(self.loss) clipped_grads_and_vars = [ (tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars ] self.grad, _ = clipped_grads_and_vars[0] self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) self.preds = self.predict()
def __init__(self, placeholders, features, adj, degrees, layer_infos, concat=True, aggregator_type="mean", model_size="small", identity_dim=0, **kwargs): ''' Args: - placeholders: Stanford TensorFlow placeholder object. - features: Numpy array with node features. NOTE: Pass a None object to train in featureless mode (identity features for nodes)! - adj: Numpy array with adjacency lists (padded with random re-samples) - degrees: Numpy array with node degrees. - layer_infos: List of SAGEInfo namedtuples that describe the parameters of all the recursive layers. See SAGEInfo definition above. - concat: whether to concatenate during recursive iterations - aggregator_type: how to aggregate neighbor information - model_size: one of "small" and "big" - identity_dim: Set to positive int to use identity features (slow and cannot generalize, but better accuracy) ''' super(SampleAndAggregate, self).__init__(**kwargs) if aggregator_type == "mean": self.aggregator_cls = MeanAggregator elif aggregator_type == "seq": self.aggregator_cls = SeqAggregator elif aggregator_type == "maxpool": self.aggregator_cls = MaxPoolingAggregator elif aggregator_type == "meanpool": self.aggregator_cls = MeanPoolingAggregator elif aggregator_type == "gcn": self.aggregator_cls = GCNAggregator else: raise Exception("Unknown aggregator: ", self.aggregator_cls) # get info from placeholders... self.inputs1 = placeholders["batch1"] self.inputs2 = placeholders["batch2"] # self.neg_samples = placeholders['neg_samples'] self.model_size = model_size self.adj_info = adj if identity_dim > 0: self.embeds = tf.get_variable( "node_embeddings", [adj.get_shape().as_list()[0], identity_dim]) else: self.embeds = None if features is None: if identity_dim == 0: raise Exception( "Must have a positive value for identity feature dimension if no input features given." ) self.features = self.embeds else: self.features = features if not self.embeds is None: self.features = tf.concat([self.embeds, self.features], axis=1) self.degrees = degrees self.concat = concat self.dims = [ (0 if features is None else features.shape[1].value) + identity_dim ] self.dims.extend( [layer_infos[i].output_dim for i in range(len(layer_infos))]) self.batch_size = placeholders["batch_size"] self.placeholders = placeholders self.layer_infos = layer_infos self.optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate) dim_mult = 2 if self.concat else 1 self.G1 = layers.Dense(dim_mult * self.dims[-1], dim_mult * self.dims[-1], dropout=self.placeholders['dropout'], act=tf.nn.relu) self.G2 = layers.Dense(dim_mult * self.dims[-1], dim_mult * self.dims[-1], dropout=self.placeholders['dropout'], act=lambda x: x, bias=False) self.build()