Exemple #1
0
    def __init__(self, node_type, edge_type, max_id, dim, walk_len, walk_num,
                 win_size, num_negs):
        super(DeepWalk, self).__init__()
        self.node_type = node_type
        self.edge_type = edge_type
        self.max_id = max_id
        self.num_negs = num_negs
        self.walk_len = walk_len
        self.walk_num = walk_num
        self.left_win_size = win_size
        self.right_win_size = win_size
        self.dim = dim

        # pair_num
        self.batch_size_ratio = \
            self.walk_num * int(euler_ops.gen_pair(tf.zeros([0, self.walk_len + 1], dtype=tf.int64),
                                                   self.left_win_size, self.right_win_size).shape[1])

        print('batch_size_ratio={}'.format(self.batch_size_ratio))

        self.target_encoder = layers.Embedding(
            self.max_id + 1,
            self.dim,
            initializer=lambda: tf.truncated_normal_initializer(stddev=1.0 / (
                self.dim**0.5)))
        self.context_encoder = layers.Embedding(
            self.max_id + 1,
            self.dim,
            initializer=lambda: tf.truncated_normal_initializer(stddev=1.0 / (
                self.dim**0.5)))
Exemple #2
0
    def __init__(self,
                 metapath,
                 dim,
                 aggregator='mean',
                 feature_idx=-1,
                 feature_dim=0,
                 max_id=-1,
                 use_feature=True,
                 use_id=False,
                 use_residual=False,
                 **kwargs):
        super(GCNEncoder, self).__init__(**kwargs)
        self.metapath = metapath
        self.num_layers = len(metapath)

        self.use_id = use_id
        self.use_feature = use_feature
        self.use_residual = use_residual
        if use_id:
            self.id_layer = layers.Embedding(max_id, dim)
        if use_feature and use_residual:
            self.feature_layer = layers.Dense(dim, use_bias=False)
        self.feature_idx = feature_idx
        self.feature_dim = feature_dim

        self.aggregators = []
        aggregator_class = sparse_aggregators.get(aggregator)
        for layer in range(self.num_layers):
            activation = tf.nn.relu if layer < self.num_layers - 1 else None
            self.aggregators.append(
                aggregator_class(dim, activation=activation))
Exemple #3
0
    def __init__(self,
                 node_type,
                 edge_type,
                 max_id,
                 dim,
                 order=1,
                 *args,
                 **kwargs):
        super(LINE, self).__init__(node_type, edge_type, max_id, *args,
                                   **kwargs)

        self.target_embedding = layers.Embedding(name='target_embedding',
                                                 max_id=max_id + 1,
                                                 dim=dim)
        if order == 1:
            self.context_embedding = self.target_embedding
        elif order == 2:
            self.context_embedding = layers.Embedding(name='context_embedding',
                                                      max_id=max_id + 1,
                                                      dim=dim)
        else:
            raise ValueError(
                'LINE order must be 1 or 2, got {}:'.format(order))
Exemple #4
0
 def __init__(self,
              dim,
              feature_idx=-1,
              feature_dim=0,
              max_id=-1,
              use_feature=True,
              use_id=False,
              **kwargs):
     super(ShallowEncoder, self).__init__(**kwargs)
     if not use_feature and not use_id:
         raise ValueError('Either use_feature or use_id must be True.')
     self.dim = dim
     self.use_id = use_feature
     self.use_feature = use_feature
     if use_id:
         self.embedding = layers.Embedding(dim, max_id)
     if use_feature:
         self.dense = layers.Dense(self.dim)
     self.feature_idx = feature_idx
     self.feature_dim = feature_dim
Exemple #5
0
    def __init__(self,
                 metapath,
                 fanouts,
                 dim,
                 aggregator='mean',
                 concat=False,
                 shared_aggregators=None,
                 feature_idx=-1,
                 feature_dim=0,
                 max_id=-1,
                 use_feature=True,
                 use_id=False,
                 **kwargs):
        super(SageEncoder, self).__init__(**kwargs)
        if len(metapath) != len(fanouts):
            raise ValueError('Len of metapath must be the same as fanouts.')
        self.metapath = metapath
        self.fanouts = fanouts
        self.num_layers = len(metapath)
        self.concat = concat

        layer0_dim = (feature_dim if use_feature else 0) + (dim
                                                            if use_id else 0)
        self.dims = [layer0_dim] + [dim] * self.num_layers

        self.use_id = use_id
        self.use_feature = use_feature
        if use_id:
            self.embedding = layers.Embedding(max_id, dim)
        self.feature_idx = feature_idx
        self.feature_dim = feature_dim

        if shared_aggregators is not None:
            self.aggregators = shared_aggregators
        else:
            self.aggregators = self.create_aggregators(dim,
                                                       self.num_layers,
                                                       aggregator,
                                                       concat=concat)
Exemple #6
0
def run_network_embedding(flags_obj, master, is_chief):
    fanouts = map(int, flags_obj.fanouts)
    if flags_obj.mode == 'train':
        metapath = [map(int, flags_obj.train_edge_type)] * len(fanouts)
    else:
        metapath = [map(int, flags_obj.all_edge_type)] * len(fanouts)

    if flags_obj.model == 'line':
        model = models.LINE(node_type=flags_obj.all_node_type,
                            edge_type=flags_obj.all_edge_type,
                            max_id=flags_obj.max_id,
                            dim=flags_obj.dim,
                            xent_loss=flags_obj.xent_loss,
                            num_negs=flags_obj.num_negs,
                            order=flags_obj.order)

    elif flags_obj.model in ['randomwalk', 'deepwalk', 'node2vec']:
        model = models.Node2Vec(node_type=flags_obj.all_node_type,
                                edge_type=flags_obj.all_edge_type,
                                max_id=flags_obj.max_id,
                                dim=flags_obj.dim,
                                xent_loss=flags_obj.xent_loss,
                                num_negs=flags_obj.num_negs,
                                walk_len=flags_obj.walk_len,
                                walk_p=flags_obj.walk_p,
                                walk_q=flags_obj.walk_q,
                                left_win_size=flags_obj.left_win_size,
                                right_win_size=flags_obj.right_win_size)

    elif flags_obj.model in ['gcn', 'gcn_supervised']:
        model = models.SupervisedGCN(label_idx=flags_obj.label_idx,
                                     label_dim=flags_obj.label_dim,
                                     num_classes=flags_obj.num_classes,
                                     sigmoid_loss=flags_obj.sigmoid_loss,
                                     metapath=metapath,
                                     dim=flags_obj.dim,
                                     aggregator=flags_obj.aggregator,
                                     feature_idx=flags_obj.feature_idx,
                                     feature_dim=flags_obj.feature_dim,
                                     use_residual=flags_obj.use_residual)

    elif flags_obj.model == 'scalable_gcn':
        model = models.ScalableGCN(
            label_idx=flags_obj.label_idx,
            label_dim=flags_obj.label_dim,
            num_classes=flags_obj.num_classes,
            sigmoid_loss=flags_obj.sigmoid_loss,
            edge_type=metapath[0],
            num_layers=len(fanouts),
            dim=flags_obj.dim,
            aggregator=flags_obj.aggregator,
            feature_idx=flags_obj.feature_idx,
            feature_dim=flags_obj.feature_dim,
            max_id=flags_obj.max_id,
            use_residual=flags_obj.use_residual,
            store_learning_rate=flags_obj.store_learning_rate,
            store_init_maxval=flags_obj.store_init_maxval)

    elif flags_obj.model == 'graphsage':
        model = models.GraphSage(node_type=flags_obj.train_node_type,
                                 edge_type=flags_obj.train_edge_type,
                                 max_id=flags_obj.max_id,
                                 xent_loss=flags_obj.xent_loss,
                                 num_negs=flags_obj.num_negs,
                                 metapath=metapath,
                                 fanouts=fanouts,
                                 dim=flags_obj.dim,
                                 aggregator=flags_obj.aggregator,
                                 concat=flags_obj.concat,
                                 feature_idx=flags_obj.feature_idx,
                                 feature_dim=flags_obj.feature_dim)

    elif flags_obj.model == 'graphsage_supervised':
        model = models.SupervisedGraphSage(label_idx=flags_obj.label_idx,
                                           label_dim=flags_obj.label_dim,
                                           num_classes=flags_obj.num_classes,
                                           sigmoid_loss=flags_obj.sigmoid_loss,
                                           metapath=metapath,
                                           fanouts=fanouts,
                                           dim=flags_obj.dim,
                                           aggregator=flags_obj.aggregator,
                                           concat=flags_obj.concat,
                                           feature_idx=flags_obj.feature_idx,
                                           feature_dim=flags_obj.feature_dim)

    elif flags_obj.model == 'scalable_sage':
        model = models.ScalableSage(
            label_idx=flags_obj.label_idx,
            label_dim=flags_obj.label_dim,
            num_classes=flags_obj.num_classes,
            sigmoid_loss=flags_obj.sigmoid_loss,
            edge_type=metapath[0],
            fanout=fanouts[0],
            num_layers=len(fanouts),
            dim=flags_obj.dim,
            aggregator=flags_obj.aggregator,
            concat=flags_obj.concat,
            feature_idx=flags_obj.feature_idx,
            feature_dim=flags_obj.feature_dim,
            max_id=flags_obj.max_id,
            store_learning_rate=flags_obj.store_learning_rate,
            store_init_maxval=flags_obj.store_init_maxval)

    elif flags_obj.model == 'gat':
        model = models.GAT(label_idx=flags_obj.label_idx,
                           label_dim=flags_obj.label_dim,
                           num_classes=flags_obj.num_classes,
                           sigmoid_loss=flags_obj.sigmoid_loss,
                           feature_idx=flags_obj.feature_idx,
                           feature_dim=flags_obj.feature_dim,
                           max_id=flags_obj.max_id,
                           head_num=flags_obj.head_num,
                           hidden_dim=flags_obj.dim,
                           nb_num=5)

    elif flags_obj.model == 'lshne':
        model = models.LsHNE(-1, [[[0, 0, 0], [0, 0, 0]]], -1, 128, [1, 1],
                             [1, 1])

    elif flags_obj.model == 'saved_embedding':
        embedding_val = np.load(
            os.path.join(flags_obj.model_dir, 'embedding.npy'))
        embedding = layers.Embedding(
            max_id=flags_obj.max_id,
            dim=flags_obj.dim,
            initializer=lambda: tf.constant_initializer(embedding_val))
        model = models.SupervisedModel(flags_obj.label_idx,
                                       flags_obj.label_dim,
                                       flags_obj.num_classes,
                                       sigmoid_loss=flags_obj.sigmoid_loss)
        model.encoder = lambda inputs: tf.stop_gradient(embedding(inputs))

    else:
        raise ValueError('Unsupported network embedding model.')

    if flags_obj.mode == 'train':
        run_train(model, flags_obj, master, is_chief)
    elif flags_obj.mode == 'evaluate':
        run_evaluate(model, flags_obj, master, is_chief)
    elif flags_obj.mode == 'save_embedding':
        run_save_embedding(model, flags_obj, master, is_chief)