def __init__(self, node_type, edge_type, max_id, dim, walk_len, walk_num, win_size, num_negs): super(DeepWalk, self).__init__() self.node_type = node_type self.edge_type = edge_type self.max_id = max_id self.num_negs = num_negs self.walk_len = walk_len self.walk_num = walk_num self.left_win_size = win_size self.right_win_size = win_size self.dim = dim # pair_num self.batch_size_ratio = \ self.walk_num * int(euler_ops.gen_pair(tf.zeros([0, self.walk_len + 1], dtype=tf.int64), self.left_win_size, self.right_win_size).shape[1]) print('batch_size_ratio={}'.format(self.batch_size_ratio)) self.target_encoder = layers.Embedding( self.max_id + 1, self.dim, initializer=lambda: tf.truncated_normal_initializer(stddev=1.0 / ( self.dim**0.5))) self.context_encoder = layers.Embedding( self.max_id + 1, self.dim, initializer=lambda: tf.truncated_normal_initializer(stddev=1.0 / ( self.dim**0.5)))
def __init__(self, metapath, dim, aggregator='mean', feature_idx=-1, feature_dim=0, max_id=-1, use_feature=True, use_id=False, use_residual=False, **kwargs): super(GCNEncoder, self).__init__(**kwargs) self.metapath = metapath self.num_layers = len(metapath) self.use_id = use_id self.use_feature = use_feature self.use_residual = use_residual if use_id: self.id_layer = layers.Embedding(max_id, dim) if use_feature and use_residual: self.feature_layer = layers.Dense(dim, use_bias=False) self.feature_idx = feature_idx self.feature_dim = feature_dim self.aggregators = [] aggregator_class = sparse_aggregators.get(aggregator) for layer in range(self.num_layers): activation = tf.nn.relu if layer < self.num_layers - 1 else None self.aggregators.append( aggregator_class(dim, activation=activation))
def __init__(self, node_type, edge_type, max_id, dim, order=1, *args, **kwargs): super(LINE, self).__init__(node_type, edge_type, max_id, *args, **kwargs) self.target_embedding = layers.Embedding(name='target_embedding', max_id=max_id + 1, dim=dim) if order == 1: self.context_embedding = self.target_embedding elif order == 2: self.context_embedding = layers.Embedding(name='context_embedding', max_id=max_id + 1, dim=dim) else: raise ValueError( 'LINE order must be 1 or 2, got {}:'.format(order))
def __init__(self, dim, feature_idx=-1, feature_dim=0, max_id=-1, use_feature=True, use_id=False, **kwargs): super(ShallowEncoder, self).__init__(**kwargs) if not use_feature and not use_id: raise ValueError('Either use_feature or use_id must be True.') self.dim = dim self.use_id = use_feature self.use_feature = use_feature if use_id: self.embedding = layers.Embedding(dim, max_id) if use_feature: self.dense = layers.Dense(self.dim) self.feature_idx = feature_idx self.feature_dim = feature_dim
def __init__(self, metapath, fanouts, dim, aggregator='mean', concat=False, shared_aggregators=None, feature_idx=-1, feature_dim=0, max_id=-1, use_feature=True, use_id=False, **kwargs): super(SageEncoder, self).__init__(**kwargs) if len(metapath) != len(fanouts): raise ValueError('Len of metapath must be the same as fanouts.') self.metapath = metapath self.fanouts = fanouts self.num_layers = len(metapath) self.concat = concat layer0_dim = (feature_dim if use_feature else 0) + (dim if use_id else 0) self.dims = [layer0_dim] + [dim] * self.num_layers self.use_id = use_id self.use_feature = use_feature if use_id: self.embedding = layers.Embedding(max_id, dim) self.feature_idx = feature_idx self.feature_dim = feature_dim if shared_aggregators is not None: self.aggregators = shared_aggregators else: self.aggregators = self.create_aggregators(dim, self.num_layers, aggregator, concat=concat)
def run_network_embedding(flags_obj, master, is_chief): fanouts = map(int, flags_obj.fanouts) if flags_obj.mode == 'train': metapath = [map(int, flags_obj.train_edge_type)] * len(fanouts) else: metapath = [map(int, flags_obj.all_edge_type)] * len(fanouts) if flags_obj.model == 'line': model = models.LINE(node_type=flags_obj.all_node_type, edge_type=flags_obj.all_edge_type, max_id=flags_obj.max_id, dim=flags_obj.dim, xent_loss=flags_obj.xent_loss, num_negs=flags_obj.num_negs, order=flags_obj.order) elif flags_obj.model in ['randomwalk', 'deepwalk', 'node2vec']: model = models.Node2Vec(node_type=flags_obj.all_node_type, edge_type=flags_obj.all_edge_type, max_id=flags_obj.max_id, dim=flags_obj.dim, xent_loss=flags_obj.xent_loss, num_negs=flags_obj.num_negs, walk_len=flags_obj.walk_len, walk_p=flags_obj.walk_p, walk_q=flags_obj.walk_q, left_win_size=flags_obj.left_win_size, right_win_size=flags_obj.right_win_size) elif flags_obj.model in ['gcn', 'gcn_supervised']: model = models.SupervisedGCN(label_idx=flags_obj.label_idx, label_dim=flags_obj.label_dim, num_classes=flags_obj.num_classes, sigmoid_loss=flags_obj.sigmoid_loss, metapath=metapath, dim=flags_obj.dim, aggregator=flags_obj.aggregator, feature_idx=flags_obj.feature_idx, feature_dim=flags_obj.feature_dim, use_residual=flags_obj.use_residual) elif flags_obj.model == 'scalable_gcn': model = models.ScalableGCN( label_idx=flags_obj.label_idx, label_dim=flags_obj.label_dim, num_classes=flags_obj.num_classes, sigmoid_loss=flags_obj.sigmoid_loss, edge_type=metapath[0], num_layers=len(fanouts), dim=flags_obj.dim, aggregator=flags_obj.aggregator, feature_idx=flags_obj.feature_idx, feature_dim=flags_obj.feature_dim, max_id=flags_obj.max_id, use_residual=flags_obj.use_residual, store_learning_rate=flags_obj.store_learning_rate, store_init_maxval=flags_obj.store_init_maxval) elif flags_obj.model == 'graphsage': model = models.GraphSage(node_type=flags_obj.train_node_type, edge_type=flags_obj.train_edge_type, max_id=flags_obj.max_id, xent_loss=flags_obj.xent_loss, num_negs=flags_obj.num_negs, metapath=metapath, fanouts=fanouts, dim=flags_obj.dim, aggregator=flags_obj.aggregator, concat=flags_obj.concat, feature_idx=flags_obj.feature_idx, feature_dim=flags_obj.feature_dim) elif flags_obj.model == 'graphsage_supervised': model = models.SupervisedGraphSage(label_idx=flags_obj.label_idx, label_dim=flags_obj.label_dim, num_classes=flags_obj.num_classes, sigmoid_loss=flags_obj.sigmoid_loss, metapath=metapath, fanouts=fanouts, dim=flags_obj.dim, aggregator=flags_obj.aggregator, concat=flags_obj.concat, feature_idx=flags_obj.feature_idx, feature_dim=flags_obj.feature_dim) elif flags_obj.model == 'scalable_sage': model = models.ScalableSage( label_idx=flags_obj.label_idx, label_dim=flags_obj.label_dim, num_classes=flags_obj.num_classes, sigmoid_loss=flags_obj.sigmoid_loss, edge_type=metapath[0], fanout=fanouts[0], num_layers=len(fanouts), dim=flags_obj.dim, aggregator=flags_obj.aggregator, concat=flags_obj.concat, feature_idx=flags_obj.feature_idx, feature_dim=flags_obj.feature_dim, max_id=flags_obj.max_id, store_learning_rate=flags_obj.store_learning_rate, store_init_maxval=flags_obj.store_init_maxval) elif flags_obj.model == 'gat': model = models.GAT(label_idx=flags_obj.label_idx, label_dim=flags_obj.label_dim, num_classes=flags_obj.num_classes, sigmoid_loss=flags_obj.sigmoid_loss, feature_idx=flags_obj.feature_idx, feature_dim=flags_obj.feature_dim, max_id=flags_obj.max_id, head_num=flags_obj.head_num, hidden_dim=flags_obj.dim, nb_num=5) elif flags_obj.model == 'lshne': model = models.LsHNE(-1, [[[0, 0, 0], [0, 0, 0]]], -1, 128, [1, 1], [1, 1]) elif flags_obj.model == 'saved_embedding': embedding_val = np.load( os.path.join(flags_obj.model_dir, 'embedding.npy')) embedding = layers.Embedding( max_id=flags_obj.max_id, dim=flags_obj.dim, initializer=lambda: tf.constant_initializer(embedding_val)) model = models.SupervisedModel(flags_obj.label_idx, flags_obj.label_dim, flags_obj.num_classes, sigmoid_loss=flags_obj.sigmoid_loss) model.encoder = lambda inputs: tf.stop_gradient(embedding(inputs)) else: raise ValueError('Unsupported network embedding model.') if flags_obj.mode == 'train': run_train(model, flags_obj, master, is_chief) elif flags_obj.mode == 'evaluate': run_evaluate(model, flags_obj, master, is_chief) elif flags_obj.mode == 'save_embedding': run_save_embedding(model, flags_obj, master, is_chief)