Example #1
0
def sc(X, W, attr):
    xshp, wshp = X.shape, W.shape
    C, OC, IC = xshp[1], wshp[0], wshp[1]
    assert C >= IC and C % IC == 0 and C // IC == eval(attr['num_group'])
    num_group = C // IC
    assert num_group == eval(attr['num_group']) and \
        OC >= num_group and OC % num_group == 0
    xs = sym_slice(X, 1, 1)
    ws = kernel_slice_2d(W)
    OPG = OC // num_group
    nattr = attr.copy()
    nattr['num_group'] = '1'
    nattr['num_filter'] = '1'
    nodes = []
    for o in range(OC):
        nnodes = []
        j = int(o/OPG)*IC
        for i in range(IC):
            xoi, woi = xs[i+j], ws[o][i]
            yoi = nd.Convolution(xoi, woi, **nattr)
            nnodes.append(yoi)
        if len(nnodes) > 1:
            zi = nd.add_n(*nnodes)
        else:
            zi = nnodes[0]
        nodes.append(zi)
    return nd.concat(*nodes, dim=1)
Example #2
0
def get_global_norm(arrays):
    ctx = arrays[0].context
    total_norm = nd.add_n(*[
        nd.dot(x, x).as_in_context(ctx)
        for x in (arr.reshape((-1, )) for arr in arrays)
    ])
    total_norm = nd.sqrt(total_norm).asscalar()
    return total_norm
Example #3
0
def sc(X, W, attr, ichannel, step):
    xshp = X.shape
    xs = sym_slice(X, ichannel, step)
    ws = sym_slice(W, ichannel, step)
    nodes = []
    j = 0
    for i in range(0, xshp[ichannel], step):
        yi = nd.Convolution(xs[j], ws[j], **attr)
        nodes.append(yi)
        j += 1
    return nd.add_n(*nodes)
Example #4
0
    def forward(self, input_vec, loss=None):
        # print('************* ' + str(input_vec.shape[1]) + ' *************')
        # print('############# ' + str(input_vec.shape) + ' #############')
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot]))
            layer[0].append(self.input_trans['global'](inputs['global']))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans(sorted_inputs, loss))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans(sorted_inputs, loss))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i](layer[i], loss))
            else:
                layer.append(self.ma_trans(layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            slotv_probs = []
            slot_hidden_value = []
            tmp_ave = nd.zeros_like(layer[-1][0])
            for i in range(len(self.slots) + 1):
                if self.use_dueling is False:
                    outputs.append(self.output_trans[i](layer[-1][i]))
                else:
                    if i < len(self.slots):
                        cur_slot_prob = self.output_trans_local_slotP(layer[-1][i])
                        cur_slot_v = self.output_trans_local_value(sorted_inputs[i])
                    else:
                        cur_slot_prob = self.output_trans_global_slotP(layer[-1][i])
                        cur_slot_v = self.output_trans_global_value(sorted_inputs[i])
                    
                    slotv_probs.append(cur_slot_prob)
                    slot_hidden_value.append(cur_slot_v)

            batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1))
            batch_value = nd.squeeze(self.output_trans_value(nd.add_n(*slot_hidden_value)))


            # print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
            # print(batch_slotv_prob)
            # print(batch_slot_prob.shape)
            # print(batch_slot_slotq.shape)
            # print(batch_slotv_prob.shape)
            
        return batch_slotv_prob, batch_value