def _compute_vert_context_soft(self, edge_factor, vert_factor, reuse=False): """ attention-based vertex(node) message pooling """ out_edge = utils.pad_and_gather(edge_factor, self.edge_pair_mask_inds[:,0]) in_edge = utils.pad_and_gather(edge_factor, self.edge_pair_mask_inds[:,1]) # gather correspounding vert factors vert_factor_gathered = tf.gather(vert_factor, self.edge_pair_segment_inds) # concat outgoing edges and ingoing edges with gathered vert_factors out_edge_w_input = tf.concat(concat_dim=1, values=[out_edge, vert_factor_gathered]) in_edge_w_input = tf.concat(concat_dim=1, values=[in_edge, vert_factor_gathered]) # compute compatibility scores (self.feed(out_edge_w_input) .fc(1, relu=False, reuse=reuse, name='out_edge_w_fc') .sigmoid(name='out_edge_score')) (self.feed(in_edge_w_input) .fc(1, relu=False, reuse=reuse, name='in_edge_w_fc') .sigmoid(name='in_edge_score')) out_edge_w = self.get_output('out_edge_score') in_edge_w = self.get_output('in_edge_score') # weight the edge factors with computed weigths out_edge_weighted = tf.mul(out_edge, out_edge_w) in_edge_weighted = tf.mul(in_edge, in_edge_w) edge_sum = out_edge_weighted + in_edge_weighted vert_ctx = tf.segment_sum(edge_sum, self.edge_pair_segment_inds) return vert_ctx
def _compute_vert_context_fus(self, edge_factor, vert_factor, reuse=False): """ attention-based vertex(node) message pooling also collecting messages from the detections of this object in other frames """ out_edge = utils.pad_and_gather( edge_factor, self.edge_pair_mask_inds[:, 0] ) # from rel_pair_segment_inds and contains the indices of the relations which are going out in_edge = utils.pad_and_gather( edge_factor, self.edge_pair_mask_inds[:, 1] ) # if I have 5 objects, each of them is connected to 4 objects, so 5 times 4, edge_pair_mask_inds has size 20, but there is 25, and this 20 coming every time. dans data_utils, there is this pad with pad with dummy edge ind # gather correspounding vert factors vert_factor_gathered = tf.gather( vert_factor, self.edge_pair_segment_inds ) # in Eq.3 you compute sum_j v_1*[h_i,hi->j]hi->j. So you want to concatenate each h_i with hi->j. Here, you are building the matrix vert_factor_gathered, contains the h_i part. Later you jus have to concatenate this matrix with out_edge which contains the hi->j. vert_factor_gathered will contains all the hi. vert_factor_gathered is also used to concatenate with in_edge, because self.edge_pair_segment_inds[k] is build to be the segment id corresponding to self.edge_pair_mask_inds[k,:] vert_factor_matched = utils.pad_and_gather( vert_factor, self.obj_fus_mask_inds[:] ) # This tells where is this object matched in the other frames, and select the corresponding features vert_factor_gathered_vid = tf.gather( vert_factor, self.obj_fus_segment_inds ) # instead of having number of output +1 I need to have the number of frames # concat outgoing edges and ingoing edges with gathered vert_factors out_edge_w_input = tf.concat(concat_dim=1, values=[out_edge, vert_factor_gathered]) in_edge_w_input = tf.concat(concat_dim=1, values=[in_edge, vert_factor_gathered]) vid_edge_w_input = tf.concat( concat_dim=1, values=[vert_factor_matched, vert_factor_gathered_vid]) # compute compatibility scores (self.feed(out_edge_w_input).fc( 1, relu=False, reuse=reuse, name='out_edge_w_fc').sigmoid(name='out_edge_score')) (self.feed(in_edge_w_input).fc( 1, relu=False, reuse=reuse, name='in_edge_w_fc').sigmoid(name='in_edge_score')) (self.feed(vid_edge_w_input).fc( 1, relu=False, reuse=reuse, name='vid_vert_w_fc').sigmoid(name='vid_vert_score')) out_edge_w = self.get_output('out_edge_score') in_edge_w = self.get_output('in_edge_score') vid_vert_w = self.get_output('vid_vert_score') # weight the edge factors with computed weigths out_edge_weighted = tf.mul(out_edge, out_edge_w) in_edge_weighted = tf.mul(in_edge, in_edge_w) vid_vert_weighted = tf.mul(vert_factor_matched, vid_vert_w) edge_sum = out_edge_weighted + in_edge_weighted vert_ctx = tf.segment_sum( edge_sum, self.edge_pair_segment_inds) + tf.segment_sum( vid_vert_weighted, self.obj_fus_segment_inds) return vert_ctx
def _compute_vert_context_soft(self, edge_factor, vert_factor, reuse=False): """ attention-based vertex(node) message pooling """ out_edge = utils.pad_and_gather( edge_factor, self.edge_pair_mask_inds[:, 0] ) # from rel_pair_segment_inds and contains the indices of the relations which are going out in_edge = utils.pad_and_gather( edge_factor, self.edge_pair_mask_inds[:, 1] ) # 100 x 512 i.e. edge_factor[self.edge_pair_mask_inds[:,1],:] self.edge_pair_mask_inds[:,1] is 100,1 # gather correspounding vert factors vert_factor_gathered = tf.gather( vert_factor, self.edge_pair_segment_inds ) # will tell you which vert_factor correspond to which segment id maybe? # concat outgoing edges and ingoing edges with gathered vert_factors out_edge_w_input = tf.concat(concat_dim=1, values=[out_edge, vert_factor_gathered ]) # 100 x 1024 in_edge_w_input = tf.concat(concat_dim=1, values=[in_edge, vert_factor_gathered]) # compute compatibility scores (self.feed(out_edge_w_input).fc( 1, relu=False, reuse=reuse, name='out_edge_w_fc').sigmoid(name='out_edge_score')) (self.feed(in_edge_w_input).fc( 1, relu=False, reuse=reuse, name='in_edge_w_fc').sigmoid(name='in_edge_score')) out_edge_w = self.get_output('out_edge_score') in_edge_w = self.get_output('in_edge_score') # weight the edge factors with computed weigths out_edge_weighted = tf.mul(out_edge, out_edge_w) in_edge_weighted = tf.mul(in_edge, in_edge_w) edge_sum = out_edge_weighted + in_edge_weighted vert_ctx = tf.segment_sum(edge_sum, self.edge_pair_segment_inds) return vert_ctx
def _compute_vert_context_hard(self, edge_factor, vert_factor, reduction_mode='max'): """ max or average message pooling """ edge_factor_gathered = utils.pad_and_gather(edge_factor, self.edge_mask_inds, None) vert_ctx = utils.padded_segment_reduce(edge_factor_gathered, self.edge_segment_inds, vert_factor.get_shape()[0], reduction_mode) return vert_ctx
def _compute_edge_context_3d_fus(self, vert_factor, edge_factor, reuse=False): """ attention-based edge message pooling this output will be the input edge_ctx of _edge_rnn_forward(edge_ctx, ... edge_factor and vert_factor are gru_cell """ vert_pairs = utils.gather_vec_pairs( vert_factor, self.relations ) #Tensor("Reshape:0", shape=(?, 1024), dtype=float32) if relation is [24 2], vert_pairs is [24 2*512] so I guess the corresponding object feature for each relation geo_pairs = utils.gather_vec_pairs( self.geo_state, self.relations ) #utils.gather_vec_pairs(self.geo_state, self.relations) self.geo_pairs = geo_pairs edge_factor_matched = utils.pad_and_gather( edge_factor, self.rel_fus_mask_inds[:] ) # getting the output for this relation in every frame edge_factor_gathered_vid = tf.gather( edge_factor, self.rel_fus_segment_inds ) # getting the same but in other order. This way it is just a matter of concatenate and sum vid_edge_w_input = tf.concat( concat_dim=1, values=[edge_factor_matched, edge_factor_gathered_vid]) sub_vert, obj_vert = tf.split( split_dim=1, num_split=2, value=vert_pairs ) # I guess putting the in (subject) on one part and the out (object) on the other part, because subject verb object, so each is 24 z 512 sub_geo, obj_geo = tf.split(split_dim=1, num_split=2, value=geo_pairs) sub_vert_w_input = tf.concat(concat_dim=1, values=[sub_vert, edge_factor]) obj_vert_w_input = tf.concat(concat_dim=1, values=[obj_vert, edge_factor]) # compute compatibility scores (self.feed(sub_vert_w_input).fc( 1, relu=False, reuse=reuse, name='sub_vert_w_fc').sigmoid(name='sub_vert_score') ) # passing the input messages from the relation to a fc layer (self.feed(obj_vert_w_input).fc( 1, relu=False, reuse=reuse, name='obj_vert_w_fc').sigmoid(name='obj_vert_score') ) # passing the previous state of the object node to a fc layer # encoding the geomtery of the relation geo_pairs = self.norm_geo_pairs(geo_pairs, sc=500) #sub_geo, obj_geo = tf.split(split_dim=1, num_split=2, value=geo_pairs) #geo_conc = tf.concat(concat_dim=1, values=[sub_geo, obj_geo]) (self.feed(geo_pairs).fc( 100, relu=False, reuse=reuse, name='geo_fc1_obj').sigmoid(name='geo_encoded_pre').fc( 512, relu=False, reuse=reuse, name='geo_fc2_obj').sigmoid(name='geo_encoded') ) # passing the previous state of the object node to a fc layer geo_encoded = self.get_output('geo_encoded') # then concatenating wiht the relation feature (edge_factor) and getting a weight out of that geo_encoded_w_input = tf.concat(concat_dim=1, values=[geo_encoded, edge_factor]) (self.feed(geo_encoded_w_input).fc( 1, relu=False, reuse=reuse, name='geo_w_fc').sigmoid(name='geo_vert_score') ) # passing the previous state of the object node to a fc layer # will compute weigts from the messages in each relations (self.feed(vid_edge_w_input).fc( 1, relu=False, reuse=reuse, name='vid_edge_w_fc').sigmoid(name='vid_edge_score')) sub_vert_w = self.get_output( 'sub_vert_score' ) #this guy is 24 x 1, and the relations vector is 24,which count for the 4 objects present in the 2 images. obj_vert_w = self.get_output('obj_vert_score') geo_vert_w = self.get_output('geo_vert_score') vid_edge_w = self.get_output('vid_edge_score') weighted_sub = tf.mul(sub_vert, sub_vert_w) weighted_obj = tf.mul(obj_vert, obj_vert_w) weighted_geo = tf.mul(geo_encoded, geo_vert_w) vid_edge_weighted = tf.mul(edge_factor_matched, vid_edge_w) return weighted_sub + weighted_obj + weighted_geo + tf.segment_sum( vid_edge_weighted, self.rel_fus_segment_inds)