Esempio n. 1
0
   def build_node_from_tensors(feature_schema, tensors):
       """Constructs nodes `Data` in Tensor format.
 Args:
   feature_schema: A (name, Decoder) tuple used to parse the feature.
 Returns:
   A `Data` object in Tensor format.
 """
       if feature_schema[1].int_attr_num > 0:
           int_attrs = next(tensors)
       else:
           int_attrs = None
       if feature_schema[1].float_attr_num > 0:
           float_attrs = next(tensors)
       else:
           float_attrs = None
       if feature_schema[1].string_attr_num > 0:
           string_attrs = next(tensors)
       else:
           string_attrs = None
       ids = next(tensors)
       feature_tensor = Data(ids,
                             ints=int_attrs,
                             floats=float_attrs,
                             strings=string_attrs)
       return feature_tensor
Esempio n. 2
0
    def test_only_ints_with_fusion(self):
        spec = FeatureSpec(10)
        total_dim = 0
        for i in range(8):
            dim = random.randint(8, 10)
            spec.append_sparse(100 + 10 * i, dim, False)
            total_dim += dim

        spec.append_sparse(100, 4, True)  # two features need hash
        spec.append_sparse(100, 4, True)
        total_dim += 8

        handler = FeatureHandler("ints_with_fusion", spec, fuse_embedding=True)
        self.assertEqual(len(handler._float_fg), 0)
        self.assertEqual(len(handler._int_fg) < 10, True)
        self.assertEqual(len(handler._fused_int_fg) > 0, True)
        self.assertEqual(len(handler._string_fg), 0)

        batch_ints = np.array([[i, 2 * i]
                               for i in range(10)]).transpose()  # [2, 10]
        input_data = Data(
            ints=tf.convert_to_tensor(batch_ints, dtype=tf.int64))
        output = handler(input_data)  # [2, total_dim]

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())
            ret = sess.run(output)
            self.assertListEqual(list(ret.shape),
                                 [2, total_dim])  # 2d array, batch_size = 2
Esempio n. 3
0
    def _build_data(cls, graphs, type='node'):
        def list_append(list, item):
            if item is not None:
                list.append(item)
            return list

        def np_concat(list):
            if list:
                return np.concatenate(list, axis=0)
            return None

        #TODO(baole): support other labels and weights.
        ids_list = []
        int_attrs_list = []
        float_attrs_list = []
        string_attrs_list = []
        for graph in graphs:
            if type == 'node':
                item = graph.nodes
            else:
                return None
            # flatten format.
            ids_list = list_append(ids_list, item.ids)
            int_attrs_list = list_append(int_attrs_list, item.int_attrs)
            float_attrs_list = list_append(float_attrs_list, item.float_attrs)
            string_attrs_list = list_append(string_attrs_list,
                                            item.string_attrs)
        ids = np_concat(ids_list)
        ints = np_concat(int_attrs_list)
        floats = np_concat(float_attrs_list)
        strings = np_concat(string_attrs_list)
        return Data(ids, ints, floats, strings)
Esempio n. 4
0
    def test_floats_and_fused_ints(self):
        spec = FeatureSpec(10)
        for i in range(3):
            spec.append_dense()

        total_dim = 3
        for i in range(7):
            dim = random.randint(8, 10)
            spec.append_sparse(20 + 10 * i, dim, False)
            total_dim += dim

        handler = FeatureHandler("floats_and_fused_ints", spec)
        self.assertEqual(len(handler._float_fg), 3)
        self.assertEqual(len(handler._int_fg), 0)
        self.assertEqual(len(handler._fused_int_fg) > 0, True)
        self.assertEqual(len(handler._string_fg), 0)

        batch_floats = np.array([[1.0 * i, 2.0 * i] for i in range(3)],
                                dtype=np.float32).transpose()  # [2, 3]
        batch_ints = np.array([[i, 2 * i]
                               for i in range(7)]).transpose()  # [2, 7]
        input_data = Data(floats=tf.convert_to_tensor(batch_floats,
                                                      dtype=tf.float32),
                          ints=tf.convert_to_tensor(batch_ints,
                                                    dtype=tf.int64))
        output = handler(input_data)

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())
            ret = sess.run(output)
            self.assertListEqual(list(ret.shape), [2, total_dim])
Esempio n. 5
0
    def test_only_strings(self):
        spec = FeatureSpec(3)
        total_dim = 0
        for i in range(3):
            dim = random.randint(8, 10)
            spec.append_multival(10 + 10 * i, dim, ",")
            total_dim += dim

        handler = FeatureHandler("only_strings", spec)
        self.assertEqual(len(handler._float_fg), 0)
        self.assertEqual(len(handler._int_fg), 0)
        self.assertEqual(len(handler._fused_int_fg), 0)
        self.assertEqual(len(handler._string_fg), 3)

        batch_ss = np.array([["f1,batch1", "f1,batch2,others"],
                             ["f2,batch1,others", "f2,batch2"],
                             ["f3,batch1,haha",
                              "f3,batch2,hh,kk"]]).transpose()  # [2, 3]
        input_data = Data(
            strings=tf.convert_to_tensor(batch_ss, dtype=tf.string))
        output = handler(input_data)  # [2, total_dim]

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())
            ret = sess.run(output)
            self.assertListEqual(list(ret.shape),
                                 [2, total_dim])  # 2d array, batch_size = 2
def induce_graph_with_edge(src_nodes, dst_nodes, 
  src_nbrs, dst_nbrs):
  """induce SubGraphs using edge and it's neighbors.
  Args:
    src_nodes: A gl.Nodes instance with shape [batch_size].
    dst_nodes: A gl.Nodes instance with shape [batch_size] or 
      [batch_size, 1] for negative sample.
    src_nbrs: The src_nodes' full neighbors with 1D shape.
    dst_nbrs: The dst_nodes' full neighbors with 1D shape.
  Returns:
    SubGraphs.
  """
  subgraphs = []
  src_offset, dst_offset = 0,0
  for i in range (src_nodes.ids.size):
    # induce k-hop enclosing SubGraph of target edge.
    ids = np.array([src_nodes.ids[i],
                    dst_nodes.ids[i].reshape([-1])]) # neg dst is 2D.
    int_attrs = _get_target_attrs(src_nodes.int_attrs, dst_nodes.int_attrs, i)
    float_attrs = _get_target_attrs(src_nodes.float_attrs, dst_nodes.float_attrs, i)
    string_attrs = _get_target_attrs(src_nodes.string_attrs, dst_nodes.string_attrs, i)

    row, col = [], []
    col_offset = ids.size
    src_begin, src_end = src_offset, src_offset + src_nbrs.offsets[i]
    dst_begin, dst_end = dst_offset, dst_offset + dst_nbrs.offsets[i]
    ids, int_attrs, float_attrs, string_attrs = \
      _concat_node_with_nbr(ids, int_attrs, float_attrs, string_attrs,
                            src_nbrs, src_begin, src_end)
    ids, int_attrs, float_attrs, string_attrs = \
      _concat_node_with_nbr(ids, int_attrs, float_attrs, string_attrs,
                            dst_nbrs, dst_begin, dst_end)
    row, col = gen_edge_index(i, src_nbrs, col_offset,
        row, col, src=True)
    col_offset += src_nbrs.offsets[i]
    row, col = gen_edge_index(i, dst_nbrs, col_offset,
        row, col, src=False)

    src_offset += src_nbrs.offsets[i]
    dst_offset += dst_nbrs.offsets[i]
    subgraph = SubGraph(np.stack([np.array(row),
                                  np.array(col)], axis=0),
                        Data(ids,
                             ints=int_attrs,
                             floats=float_attrs,
                             strings=string_attrs))
    subgraphs.append(subgraph)
  return subgraphs
Esempio n. 7
0
  def build_data_dict(self, flatten_values):
    """Build the dict of Data from flatten value lists.

    Returns:
        dict: key is alias, value is `Data`.
    """
    data_dict = {}
    cursor = [-1]

    def pop(mask):
      if mask:
        cursor[0] += 1
        return flatten_values[cursor[0]]
      return None

    for alias, masks in self._masks.items():
      ints, floats, strings, labels, weights, ids, dst_ids, offsets = \
        [pop(msk) for msk in sum(masks, [])]
      data_dict[alias] = Data(
        ids, ints, floats, strings, labels, weights, 
        offsets=offsets, dst_ids=dst_ids)
    return data_dict
Esempio n. 8
0
 def transform(self, transform_func=None):
     """transforms `BatchGraph`. Default transformation is encoding 
 nodes feature to embedding.
 Args:
   transform_func: A function that takes in an `BatchGraph` object 
     and returns a transformed version. 
 """
     if self.node_schema is None:
         return self
     # TODO(baole): supports heterogeneous grpah.
     vertex_handler = FeatureHandler(self.node_schema[0],
                                     self.node_schema[1].feature_spec)
     node = Data(self.nodes.ids, self.nodes.int_attrs,
                 self.nodes.float_attrs, self.nodes.string_attrs)
     node_tensor = vertex_handler.forward(node)
     graph = BatchGraph(self.edge_index,
                        node_tensor,
                        self.node_schema,
                        self.graph_node_offsets,
                        additional_keys=self.additional_keys)
     for key in self.additional_keys:
         graph[key] = self[key]
     return graph
Esempio n. 9
0
    def test_only_floats(self):
        spec = FeatureSpec(10)
        for i in range(10):
            spec.append_dense()

        handler = FeatureHandler("only_floats", spec)
        self.assertEqual(len(handler._float_fg), 10)
        self.assertEqual(len(handler._int_fg), 0)
        self.assertEqual(len(handler._fused_int_fg), 0)
        self.assertEqual(len(handler._string_fg), 0)

        batch_floats = np.array([[1.0 * i, 2.0 * i]
                                 for i in range(10)]).transpose()  # [2, 10]
        input_data = Data(
            floats=tf.convert_to_tensor(batch_floats, dtype=tf.float32))
        output = handler(input_data)  # [2, 10]

        with tf.Session() as sess:
            sess.run(tf.local_variables_initializer())
            sess.run(tf.global_variables_initializer())
            ret = sess.run(output)
            self.assertListEqual(list(ret.shape), list(batch_floats.shape))
            for i in range(ret.shape[0]):
                self.assertListEqual(list(ret[i]), list(batch_floats[i]))